#rm(list = ls())
library(tidyverse)
── Attaching core tidyverse packages ─────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.3     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.2     ── Conflicts ───────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(future)
library(ggthemes)
set.seed(1245264)

In this notebook we will test the performance of varKode to distinguish species of Stigmaphyllon and figure out the best parameters for training a dataset.

Kmer size and amount of data

To start, we produced images from different numbers of kmers. We can suppose that shorter kmers will offer lower resolution to resolve species, but they will also create smaller files that require less computation. Here we will test whether images based on longer kmers result in higher accuracy. As an example, here are images produced from 200Mb for the same sample, but different kmer sizes (5-9):

knitr::include_graphics(paste0('images_',5:9,'/S_bannisterioides+S-91_00200000K.png'))

We also used different amounts of data to produce images, since we want to figure out the lowest amount needed to distinguish species. With less data, figures get more noisy since chance plays a bigger role in the observed kmer frequencies. This should be more severe for larger kmer sizes, since each kmer will be more unique in the genome.

For example, images for 5-mer for the same sample as above, for 500Kb and 200Mb:

knitr::include_graphics(paste0('images_6/S_bannisterioides+S-91_00',c('000500','200000'),'K.png'))

The same, but for 8-mers:

knitr::include_graphics(paste0('images_8/S_bannisterioides+S-91_00',c('000500','200000'),'K.png'))

Now that we understand the differences between images, let’s understand the effect in accuracy. We previously trained CNN models to recognize images for a combination of kmer sizes and amount of data, with 10 replicates for each combination. In each replicate, we kept 3 randomly chosen samples per species as a validation set and checked the accuracy of the trained model in guessing the species of these samples, for different amounts of data used for the validation sample. What we want is to find:

1 - The lowest kmer size to produce high accuracy

2 - The lowest amount of data needed

3 - Whether the amount of data used for training and for querying must be similar.

The results of these simulations were saved as a csv table, let’s load it (ignoring the first, index column):

df = read_csv('kmerSize_VS_bp.csv')[-1]
New names:Rows: 4500 Columns: 11── Column specification ───────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (3): bp_training, samples_training, samples_valid
dbl (8): ...1, kmer_size, replicate, bp_valid, n_samp_training, n_samp_valid, valid_loss, valid_acc
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df

Now let’s make sure bp_training and bp_valid are treated as ordered factors for nice plotting:

not_all = as.character(sort(as.integer(unique(df$bp_training[!str_detect(df$bp_training,'\\|',)]))/1e6))
ordered_levels = c(not_all,'all')

df = df %>%
  mutate(bp_training = as.character(as.integer(bp_training)/1e6) ) %>%
  mutate(bp_training = replace_na(bp_training, 'all')) %>%
  mutate(bp_training = factor(bp_training,
                              levels = ordered_levels, 
                              ordered = TRUE),
         bp_valid = factor(as.character(as.integer(bp_valid)/1e6), 
                           levels=ordered_levels, 
                           ordered = TRUE),
         kmer_size = factor(as.character(kmer_size),
                            levels = as.character(sort(unique(kmer_size))),
                            ordered = TRUE
                            )
         )
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `bp_training = as.character(as.integer(bp_training)/1e+06)`.
Caused by warning:
! NAs introduced by coercion
df

Let’s summarize these results in table so we can put some numbers in the paper:

df %>%
  group_by(kmer_size,bp_training) %>%
  summarize(min_valid = min(valid_acc),
            mean_valid = mean(valid_acc),
            max_valid = max(valid_acc))
`summarise()` has grouped output by 'kmer_size'. You can override using the `.groups` argument.

Now we can plot:

kmer_labeller = as_labeller(function(value){
  return(paste0('kmer length:',value))
})

ggplot(df) +
  geom_jitter(aes(x = bp_training, y = bp_valid, color = valid_acc)) +
  scale_color_viridis_c('Validation\naccuracy', option = 'inferno', limits = c(0,1)) +
  facet_grid(~kmer_size, labeller = kmer_labeller) +
  coord_equal() +
  xlab('Data in training images (Mb)') +
  ylab('Data in validation images (Mb)')

NA

Now a version with averaged accuracy

p = df %>%
  group_by(kmer_size,bp_training,bp_valid) %>%
  summarize(valid_acc = mean(valid_acc)) %>%
  ggplot(aes(x = bp_training, y = bp_valid, fill = valid_acc)) +
  geom_raster() +
  #geom_text(aes(label=sprintf(100*valid_acc,fmt='%2.0f')),size=4.5*5/14) +
  scale_fill_viridis_c('Average\nvalidation\naccuracy', option = 'magma', limits = c(0,1),labels=scales::percent) +
  facet_grid(~kmer_size, labeller = kmer_labeller) +
  coord_equal() +
  xlab('Data in training images (Mb)') +
  ylab('Data in validation images (Mb)') +
  theme_few(base_size = 6)
`summarise()` has grouped output by 'kmer_size', 'bp_training'. You can override using the `.groups` argument.
p


dir.create('paper_images')
Warning: 'paper_images' already exists
ggsave(filename = 'kmerlen_vs_accuracy.png',plot =p,device='png',path = 'paper_images',width = 22,height = 5,units = 'cm',dpi = 2400)
means = df %>%
  filter(bp_training %in% c('0.5','1','200','all')) %>%
  #filter(bp_valid %in% c('50','100','200','all')) %>%
  filter(bp_valid %in% c('2','5','10','20','50','100','200')) %>%
  group_by(bp_training,kmer_size) %>%
  summarise(Int=median(valid_acc))
`summarise()` has grouped output by 'bp_training'. You can override using the `.groups` argument.
df %>%
  filter(bp_training %in% c('0.5','1','200','all')) %>%
  filter(bp_valid %in% c('2','5','10','20','50','100','200')) %>%
  #filter(bp_valid %in% c('50','100','200','all')) %>%
  ggplot(aes(x=valid_acc)) +
  geom_histogram(aes(x=valid_acc)) +
  facet_grid(kmer_size~bp_training) +
  geom_vline(data = means, aes(xintercept = Int))

So it seems that the smallest kmer sizes never result in very high accuracy, and the largest kmer sizes result in high accuracy for higher amounts of data, but lower accuracy for lower amounts. It seems that a kmer size of 7 is a good balance, and that training using images of different sizes helps in being more robust to the amount of data used to produce validation images.

As little as 1Mb produces moderately accurate results for kmer size 7 or below.

Can we quantify what is different about images produced with different data amounts? It seems there is larger variation in pixel intensities, probably because of random fluctuations:

images = c(list.files(path='images_5',pattern='.png', recursive = T, full.names = T),
           list.files(path='images_6',pattern='.png', recursive = T, full.names = T),
           list.files(path='images_7',pattern='.png', recursive = T, full.names = T),
           list.files(path='images_8',pattern='.png', recursive = T, full.names = T),
           list.files(path='images_9',pattern='.png', recursive = T, full.names = T))

nkmers = function(k){ #from https://bioinfologics.github.io/post/2018/09/17/k-mer-counting-part-i-introduction/
  (4^k + (1 - k%%2) * 4^(k/2))/2
}

get_sd = function(path){
  k = as.integer(gsub('.+_([0-9])/.+','\\1', path))
  taxon = gsub('.+/(.+)\\+.+','\\1', path)
  sample = gsub('.+\\+(S-[0-9]+)_.+','\\1', path)
  Mbp = as.integer(gsub('.+_([0-9]{8})K.+','\\1', path)) / 1000
  
  x = sort(png::readPNG(path))
  x = x[(length(x)-nkmers(k)+1):length(x)]
  sd_counts = sd(table(x))
  
  data.frame(k = k, taxon = taxon, sample = sample, Mbp = Mbp, sd_counts=sd_counts)
  
}

plan(multisession(workers = 4))
df = furrr::future_map_dfr(images,get_sd)
plan(sequential)


df
ggplot(df) +
  geom_line(aes(x=Mbp, y=sd_counts,color=sample)) +
  facet_wrap(as.factor(k)~.,scales = 'free') +
  scale_color_discrete(guide='none') +
  scale_y_log10() +
  scale_x_log10()

Training parameters

Now we will check the results of using different training parameters: - model pretraining - augmentation (CutMix or MixUp) - Label Smoothing - model architecture - lighting transformations

Let’s read the data and prepare for plotting:

df = read_csv('training_params.csv')[-1] %>%
  mutate(bp_valid = factor(as.character(as.integer(bp_valid)/1e6), 
                           levels = sort(unique(bp_valid/1e6)), 
                           ordered = TRUE),
         augmentation = ifelse(str_detect(callback,'CutMix'),'CutMix',
                               ifelse(str_detect(callback,'MixUp'),'MixUp',
                                      'None')
                               ),
         augmentation = factor(augmentation, levels = c('None','MixUp','CutMix'),ordered = F),
         aug = str_replace(augmentation,'None',''),
         lablsmth= ifelse(label_smoothing,
                                  'label Smoothing',
                                  ''),
         pretr = ifelse(pretrained,
                             'pretrained',
                             ''
                             ),
         transformations = ifelse(trans,
                             'with_transforms',
                             ''
                             ),
         parameters = paste(arch,pretr,lablsmth,aug,transformations,sep=',') %>%
           str_replace_all(',{2,}',',') %>%
           str_remove_all('^,|,$') %>%
           str_replace_all('^$','None') %>%
           fct_reorder(valid_acc, mean)
  )
New names:Rows: 30240 Columns: 16── Column specification ───────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (5): bp_training, samples_training, samples_valid, callback, arch
dbl (8): ...1, kmer_size, replicate, bp_valid, n_samp_training, n_samp_valid, valid_loss, valid_acc
lgl (3): label_smoothing, pretrained, trans
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df 
NA
NA
NA

Now we can plot the effect of parameters. There are clearly some models that do much better than others:

ggplot(df, aes(x = parameters, y = valid_acc)) +
  #geom_boxplot() +
  #geom_violin(adjust=1.5) +
  geom_jitter(aes(color = bp_valid),height = 0.005) +
  scale_color_viridis_d(option='turbo',begin = 0.1, end=0.9) +
  #facet_wrap(~bp_valid) +
  theme(axis.text.x = element_text(hjust = 1, angle = 45))

Let’s look at the top 20 models:

ggplot(filter(df, parameters %in% tail(levels(df$parameters),20)), aes(x = parameters, y = valid_acc)) +
  #geom_boxplot() +
  #geom_violin(adjust=1.5) +
  geom_jitter(aes(color = bp_valid),height = 0.005) +
  scale_color_viridis_d(option='turbo',begin = 0.1, end=0.9) +
  #facet_wrap(~bp_valid) +
  theme(axis.text.x = element_text(hjust = 1, angle = 45))

Let’s plot by architecture:

p = ggplot(mutate(df, arch = fct_reorder(arch,valid_acc)), 
       aes(x = arch, y = valid_acc, color=bp_valid)) +
  #geom_boxplot() +
  #geom_violin(adjust=1.5) +
  geom_jitter(aes(color = bp_valid),height = 0.005, size = 0.1, alpha = 0.1, shape = 16) +
  stat_summary(fun = mean, geom = 'crossbar', size = 0.05, show.legend=FALSE) +
  scale_color_viridis_d(option='turbo',begin = 0.1, end=0.9, name = 'Mbp in validation\nimages', 
                        guide = guide_legend(override.aes = list(alpha = 1))) +
  scale_y_continuous(labels = scales::percent, name = 'Validation Accuracy') +
  xlab('Model architecture') +
  #facet_wrap(~bp_valid) +
  theme_few(base_size = 6) +
  theme(axis.text.x = element_text(hjust = 1, angle = 45),
        legend.key.size = unit(0.2, "cm"))
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
Please use `linewidth` instead.
p 

ggsave(filename = 'architecture.png',plot =p,device='png',path = 'paper_images',width = 5,height = 5,units = 'cm',dpi = 2400)

Now by pretrained:

p = ggplot(mutate(df, pretr = fct_reorder(pretr,valid_acc)), 
       aes(x = pretr, y = valid_acc, color=bp_valid)) +
  #geom_boxplot() +
  #geom_violin() +
  geom_jitter(aes(color = bp_valid),height = 0.005, size = 0.05, alpha = 0.1, shape = 16) +
  stat_summary(fun = mean, geom = 'crossbar', size = 0.05) +
  scale_x_discrete(labels = c('pre-trained','random'), name = 'Model pretraining') +
  scale_color_viridis_d(option='turbo',begin = 0.1, end=0.9, name = 'Mbp in validation\nimages') +
  scale_y_continuous(labels = scales::percent, name = 'Validation Accuracy') +
  #facet_wrap(~bp_valid) +
  theme_few(base_size = 6) +
  theme(axis.text.x = element_text(hjust = 1, angle = 45),
        legend.position = 'none'
        )

p

ggsave(filename = 'pretraining.png',plot =p,device='png',path = 'paper_images',width = 3,height = 5,units = 'cm',dpi = 2400)

Now by label smoothing:

p = ggplot(mutate(df, lablsmth = fct_reorder(lablsmth,valid_acc)), 
         aes(x = lablsmth, y = valid_acc, color=bp_valid)) +
  #geom_boxplot() +
  #geom_violin(adjust=1.5) +
  geom_jitter(aes(color = bp_valid),height = 0.005, size = 0.05, alpha = 0.1, shape = 16) +
  stat_summary(fun = mean, geom = 'crossbar', size = 0.05) +
  scale_x_discrete(labels = c('No','Yes'), name = 'Label smoothing') +
  scale_color_viridis_d(option='turbo',begin = 0.1, end=0.9, name = 'Mbp in validation\nimages') +
  scale_y_continuous(labels = scales::percent, name = 'Validation Accuracy') +
  #facet_wrap(~bp_valid) +
  theme_few(base_size = 6) +
  theme(axis.text.x = element_text(hjust = 1, angle = 45),
        legend.position = 'none'
        )

p
ggsave(filename = 'labelsmoothing.png',plot =p,device='png',path = 'paper_images',width = 3,height = 5,units = 'cm',dpi = 2400)

Now by CutMix/MixUp augmentations:

p = ggplot(mutate(df, augmentation = fct_reorder(augmentation,valid_acc)), 
       aes(x = augmentation, y = valid_acc, color = bp_valid)) +
  #geom_boxplot() +
  #geom_violin(adjust=1.5) +
  geom_jitter(aes(color = bp_valid),height = 0.005, size = 0.05, alpha = 0.1, shape = 16) +
  stat_summary(fun = mean, geom = 'crossbar', size = 0.05) +
  scale_x_discrete(name = 'Augmentation') +
  scale_color_viridis_d(option='turbo',begin = 0.1, end=0.9, name = 'Mbp in validation\nimages') +
  scale_y_continuous(labels = scales::percent, name = 'Validation Accuracy') +
  theme_few(base_size = 6) +
  theme(axis.text.x = element_text(hjust = 1, angle = 45),
        legend.position = 'none'
        )

p
ggsave(filename = 'augmentations.png',plot =p,device='png',path = 'paper_images',width = 3,height = 5,units = 'cm',dpi = 2400)

Finally,by lighting transforms:

p = ggplot(mutate(df, transformations = fct_reorder(transformations,valid_acc)), 
       aes(x = transformations, y = valid_acc, color=bp_valid)) +
  #geom_boxplot() +
  #geom_violin(adjust=1.5) +
  geom_jitter(aes(color = bp_valid),height = 0.005, size = 0.05, alpha = 0.1, shape = 16) +
  stat_summary(fun = mean, geom = 'crossbar', size = 0.05) +
  scale_x_discrete(name = 'Lighting transforms', labels = c('No','Yes')) +
  scale_color_viridis_d(option='turbo',begin = 0.1, end=0.9, name = 'Mbp in validation\nimages') +
  scale_y_continuous(labels = scales::percent, name = 'Validation Accuracy') +
  theme_few(base_size = 6) +
  theme(axis.text.x = element_text(hjust = 1, angle = 45),
        legend.position = 'none'
        )

p
ggsave(filename = 'lighting.png',plot =p,device='png',path = 'paper_images',width = 3,height = 5,units = 'cm',dpi = 2400)

Let’s try a linear model to check which combination is best:

full_model = lm(asin(valid_acc)~arch*trans*pretrained*augmentation*label_smoothing*bp_valid, data = df)
plot(full_model)

reduced_model = step(lm(asin(valid_acc)~1, data = df), 
                     scope = list(lower = formula(asin(valid_acc)~1), 
                                  upper = formula(asin(valid_acc)~arch*trans*pretrained*augmentation*label_smoothing*bp_valid)
                                  ),
                     direction = 'forward')

The best model is quite complex with some interactions

reduced_model

Call:
lm(formula = asin(valid_acc) ~ bp_valid + pretrained + arch + 
    augmentation + label_smoothing + trans + pretrained:arch + 
    bp_valid:pretrained + bp_valid:arch + arch:augmentation + 
    bp_valid:augmentation + pretrained:augmentation + augmentation:label_smoothing + 
    pretrained:label_smoothing + arch:trans + augmentation:trans + 
    bp_valid:pretrained:arch + pretrained:arch:augmentation + 
    pretrained:augmentation:label_smoothing + arch:augmentation:trans + 
    bp_valid:pretrained:augmentation, data = df)

Coefficients:
                                              (Intercept)  
                                                6.561e-01  
                                               bp_valid.L  
                                                3.114e-01  
                                               bp_valid.Q  
                                               -1.833e-01  
                                               bp_valid.C  
                                                5.212e-02  
                                               bp_valid^4  
                                                1.349e-02  
                                               bp_valid^5  
                                               -2.865e-02  
                                               bp_valid^6  
                                                1.188e-02  
                                               bp_valid^7  
                                                9.771e-03  
                                               bp_valid^8  
                                               -5.580e-03  
                                           pretrainedTRUE  
                                                6.445e-02  
                                  archig_resnext101_32x8d  
                                                4.786e-01  
                                           archresnet101d  
                                                3.779e-01  
                                            archresnet18d  
                                                3.050e-01  
                                             archresnet50  
                                                4.784e-01  
                                            archresnet50d  
                                                3.871e-01  
                                      archwide_resnet50_2  
                                                5.112e-01  
                                        augmentationMixUp  
                                                6.400e-02  
                                       augmentationCutMix  
                                               -2.131e-02  
                                      label_smoothingTRUE  
                                                3.604e-02  
                                                transTRUE  
                                                1.098e-02  
                   pretrainedTRUE:archig_resnext101_32x8d  
                                               -3.675e-01  
                            pretrainedTRUE:archresnet101d  
                                               -3.555e-01  
                             pretrainedTRUE:archresnet18d  
                                               -2.542e-01  
                              pretrainedTRUE:archresnet50  
                                               -3.452e-01  
                             pretrainedTRUE:archresnet50d  
                                               -3.491e-01  
                       pretrainedTRUE:archwide_resnet50_2  
                                               -4.532e-01  
                                bp_valid.L:pretrainedTRUE  
                                                2.358e-01  
                                bp_valid.Q:pretrainedTRUE  
                                               -4.133e-02  
                                bp_valid.C:pretrainedTRUE  
                                               -5.603e-02  
                                bp_valid^4:pretrainedTRUE  
                                                4.519e-02  
                                bp_valid^5:pretrainedTRUE  
                                               -1.260e-02  
                                bp_valid^6:pretrainedTRUE  
                                               -1.502e-02  
                                bp_valid^7:pretrainedTRUE  
                                                5.130e-03  
                                bp_valid^8:pretrainedTRUE  
                                                3.372e-03  
                       bp_valid.L:archig_resnext101_32x8d  
                                               -2.926e-02  
                       bp_valid.Q:archig_resnext101_32x8d  
                                               -8.490e-03  
                       bp_valid.C:archig_resnext101_32x8d  
                                                1.977e-02  
                       bp_valid^4:archig_resnext101_32x8d  
                                               -8.721e-03  
                       bp_valid^5:archig_resnext101_32x8d  
                                               -2.483e-03  
                       bp_valid^6:archig_resnext101_32x8d  
                                                8.711e-03  
                       bp_valid^7:archig_resnext101_32x8d  
                                               -1.260e-02  
                       bp_valid^8:archig_resnext101_32x8d  
                                                2.109e-04  
                                bp_valid.L:archresnet101d  
                                               -1.653e-01  
                                bp_valid.Q:archresnet101d  
                                                7.381e-02  
                                bp_valid.C:archresnet101d  
                                                9.417e-04  
                                bp_valid^4:archresnet101d  
                                               -2.948e-02  
                                bp_valid^5:archresnet101d  
                                                2.877e-02  
                                bp_valid^6:archresnet101d  
                                               -1.376e-02  
                                bp_valid^7:archresnet101d  
                                               -5.532e-03  
                                bp_valid^8:archresnet101d  
                                               -4.737e-03  
                                 bp_valid.L:archresnet18d  
                                               -1.193e-01  
                                 bp_valid.Q:archresnet18d  
                                                5.106e-02  
                                 bp_valid.C:archresnet18d  
                                                1.054e-02  
                                 bp_valid^4:archresnet18d  
                                               -3.777e-02  
                                 bp_valid^5:archresnet18d  
                                                2.669e-02  
                                 bp_valid^6:archresnet18d  
                                               -5.637e-03  
                                 bp_valid^7:archresnet18d  
                                               -1.105e-02  
                                 bp_valid^8:archresnet18d  
                                                1.530e-03  
                                  bp_valid.L:archresnet50  
                                               -4.650e-02  
                                  bp_valid.Q:archresnet50  
                                               -6.761e-03  
                                  bp_valid.C:archresnet50  
                                                2.939e-02  
                                  bp_valid^4:archresnet50  
                                               -1.920e-02  
                                  bp_valid^5:archresnet50  
                                                2.184e-03  
                                  bp_valid^6:archresnet50  
                                                1.335e-02  
                                  bp_valid^7:archresnet50  
                                               -1.706e-02  
                                  bp_valid^8:archresnet50  
                                                2.617e-03  
                                 bp_valid.L:archresnet50d  
                                               -1.553e-01  
                                 bp_valid.Q:archresnet50d  
                                                7.842e-02  
                                 bp_valid.C:archresnet50d  
                                               -4.425e-03  
                                 bp_valid^4:archresnet50d  
                                               -2.616e-02  
                                 bp_valid^5:archresnet50d  
                                                2.930e-02  
                                 bp_valid^6:archresnet50d  
                                               -1.431e-02  
                                 bp_valid^7:archresnet50d  
                                               -3.372e-03  
                                 bp_valid^8:archresnet50d  
                                               -5.996e-03  
                           bp_valid.L:archwide_resnet50_2  
                                               -3.658e-02  
                           bp_valid.Q:archwide_resnet50_2  
                                                1.171e-03  
                           bp_valid.C:archwide_resnet50_2  
                                                1.877e-02  
                           bp_valid^4:archwide_resnet50_2  
                                               -1.304e-02  
                           bp_valid^5:archwide_resnet50_2  
                                               -1.398e-02  
                           bp_valid^6:archwide_resnet50_2  
                                                2.189e-02  
                           bp_valid^7:archwide_resnet50_2  
                                               -1.898e-02  
                           bp_valid^8:archwide_resnet50_2  
                                               -9.812e-04  
                archig_resnext101_32x8d:augmentationMixUp  
                                               -1.766e-02  
                         archresnet101d:augmentationMixUp  
                                                5.130e-02  
                          archresnet18d:augmentationMixUp  
                                               -9.741e-03  
                           archresnet50:augmentationMixUp  
                                               -2.336e-02  
                          archresnet50d:augmentationMixUp  
                                                2.380e-02  
                    archwide_resnet50_2:augmentationMixUp  
                                               -5.815e-02  
               archig_resnext101_32x8d:augmentationCutMix  
                                                9.295e-02  
                        archresnet101d:augmentationCutMix  
                                                1.288e-01  
                         archresnet18d:augmentationCutMix  
                                                6.007e-02  
                          archresnet50:augmentationCutMix  
                                                7.041e-02  
                         archresnet50d:augmentationCutMix  
                                                1.017e-01  
                   archwide_resnet50_2:augmentationCutMix  
                                                3.642e-02  
                             bp_valid.L:augmentationMixUp  
                                                3.908e-03  
                             bp_valid.Q:augmentationMixUp  
                                               -6.958e-03  
                             bp_valid.C:augmentationMixUp  
                                                4.087e-03  
                             bp_valid^4:augmentationMixUp  
                                                2.047e-04  
                             bp_valid^5:augmentationMixUp  
                                                5.062e-03  
                             bp_valid^6:augmentationMixUp  
                                               -4.185e-03  
                             bp_valid^7:augmentationMixUp  
                                                2.057e-03  
                             bp_valid^8:augmentationMixUp  
                                                4.019e-03  
                            bp_valid.L:augmentationCutMix  
                                               -1.606e-02  
                            bp_valid.Q:augmentationCutMix  
                                                4.800e-03  
                            bp_valid.C:augmentationCutMix  
                                                7.649e-05  
                            bp_valid^4:augmentationCutMix  
                                               -1.092e-04  
                            bp_valid^5:augmentationCutMix  
                                                1.038e-02  
                            bp_valid^6:augmentationCutMix  
                                               -7.533e-03  
                            bp_valid^7:augmentationCutMix  
                                                2.572e-03  
                            bp_valid^8:augmentationCutMix  
                                                4.894e-03  
                         pretrainedTRUE:augmentationMixUp  
                                               -5.293e-02  
                        pretrainedTRUE:augmentationCutMix  
                                                2.794e-02  
                    augmentationMixUp:label_smoothingTRUE  
                                               -5.032e-02  
                   augmentationCutMix:label_smoothingTRUE  
                                               -5.159e-02  
                       pretrainedTRUE:label_smoothingTRUE  
                                               -5.352e-02  
                        archig_resnext101_32x8d:transTRUE  
                                                1.860e-03  
                                 archresnet101d:transTRUE  
                                                1.813e-03  
                                  archresnet18d:transTRUE  
                                                1.203e-03  
                                   archresnet50:transTRUE  
                                               -9.509e-03  
                                  archresnet50d:transTRUE  
                                               -4.479e-03  
                            archwide_resnet50_2:transTRUE  
                                               -1.943e-02  
                              augmentationMixUp:transTRUE  
                                               -1.450e-02  
                             augmentationCutMix:transTRUE  
                                               -5.982e-03  
        bp_valid.L:pretrainedTRUE:archig_resnext101_32x8d  
                                               -2.637e-02  
        bp_valid.Q:pretrainedTRUE:archig_resnext101_32x8d  
                                               -2.021e-02  
        bp_valid.C:pretrainedTRUE:archig_resnext101_32x8d  
                                                1.837e-02  
        bp_valid^4:pretrainedTRUE:archig_resnext101_32x8d  
                                               -1.841e-02  
        bp_valid^5:pretrainedTRUE:archig_resnext101_32x8d  
                                                2.453e-02  
        bp_valid^6:pretrainedTRUE:archig_resnext101_32x8d  
                                               -9.192e-03  
        bp_valid^7:pretrainedTRUE:archig_resnext101_32x8d  
                                               -3.420e-03  
        bp_valid^8:pretrainedTRUE:archig_resnext101_32x8d  
                                               -1.808e-02  
                 bp_valid.L:pretrainedTRUE:archresnet101d  
                                                1.349e-01  
                 bp_valid.Q:pretrainedTRUE:archresnet101d  
                                               -7.756e-02  
                 bp_valid.C:pretrainedTRUE:archresnet101d  
                                                3.039e-02  
                 bp_valid^4:pretrainedTRUE:archresnet101d  
                                                6.843e-03  
                 bp_valid^5:pretrainedTRUE:archresnet101d  
                                               -1.434e-02  
                 bp_valid^6:pretrainedTRUE:archresnet101d  
                                               -1.204e-03  
                 bp_valid^7:pretrainedTRUE:archresnet101d  
                                               -6.390e-03  
                 bp_valid^8:pretrainedTRUE:archresnet101d  
                                                1.045e-02  
                  bp_valid.L:pretrainedTRUE:archresnet18d  
                                                1.673e-01  
                  bp_valid.Q:pretrainedTRUE:archresnet18d  
                                               -7.756e-02  
                  bp_valid.C:pretrainedTRUE:archresnet18d  
                                                2.741e-02  
                  bp_valid^4:pretrainedTRUE:archresnet18d  
                                                7.421e-04  
                  bp_valid^5:pretrainedTRUE:archresnet18d  
                                               -6.923e-04  
                  bp_valid^6:pretrainedTRUE:archresnet18d  
                                               -1.453e-02  
                  bp_valid^7:pretrainedTRUE:archresnet18d  
                                                8.352e-03  
                  bp_valid^8:pretrainedTRUE:archresnet18d  
                                               -4.905e-03  
                   bp_valid.L:pretrainedTRUE:archresnet50  
                                               -1.594e-02  
                   bp_valid.Q:pretrainedTRUE:archresnet50  
                                                9.914e-03  
                   bp_valid.C:pretrainedTRUE:archresnet50  
                                               -1.984e-02  
                   bp_valid^4:pretrainedTRUE:archresnet50  
                                               -1.313e-02  
                   bp_valid^5:pretrainedTRUE:archresnet50  
                                                3.741e-02  
                   bp_valid^6:pretrainedTRUE:archresnet50  
                                                5.299e-04  
                   bp_valid^7:pretrainedTRUE:archresnet50  
                                               -3.754e-03  
                   bp_valid^8:pretrainedTRUE:archresnet50  
                                               -1.681e-03  
                  bp_valid.L:pretrainedTRUE:archresnet50d  
                                                1.205e-01  
                  bp_valid.Q:pretrainedTRUE:archresnet50d  
                                               -1.114e-01  
                  bp_valid.C:pretrainedTRUE:archresnet50d  
                                                4.579e-02  
                  bp_valid^4:pretrainedTRUE:archresnet50d  
                                                9.102e-03  
                  bp_valid^5:pretrainedTRUE:archresnet50d  
                                               -3.293e-02  
                  bp_valid^6:pretrainedTRUE:archresnet50d  
                                                9.293e-03  
                  bp_valid^7:pretrainedTRUE:archresnet50d  
                                                2.861e-02  
                  bp_valid^8:pretrainedTRUE:archresnet50d  
                                                8.449e-03  
            bp_valid.L:pretrainedTRUE:archwide_resnet50_2  
                                               -2.731e-02  
            bp_valid.Q:pretrainedTRUE:archwide_resnet50_2  
                                                2.152e-02  
            bp_valid.C:pretrainedTRUE:archwide_resnet50_2  
                                               -1.329e-02  
            bp_valid^4:pretrainedTRUE:archwide_resnet50_2  
                                               -2.174e-02  
            bp_valid^5:pretrainedTRUE:archwide_resnet50_2  
                                                5.888e-02  
            bp_valid^6:pretrainedTRUE:archwide_resnet50_2  
                                               -4.246e-02  
            bp_valid^7:pretrainedTRUE:archwide_resnet50_2  
                                                3.718e-02  
            bp_valid^8:pretrainedTRUE:archwide_resnet50_2  
                                               -3.049e-02  
 pretrainedTRUE:archig_resnext101_32x8d:augmentationMixUp  
                                                6.789e-02  
          pretrainedTRUE:archresnet101d:augmentationMixUp  
                                               -3.619e-02  
           pretrainedTRUE:archresnet18d:augmentationMixUp  
                                               -3.443e-03  
            pretrainedTRUE:archresnet50:augmentationMixUp  
                                                2.548e-02  
           pretrainedTRUE:archresnet50d:augmentationMixUp  
                                                5.615e-04  
     pretrainedTRUE:archwide_resnet50_2:augmentationMixUp  
                                                4.066e-02  
pretrainedTRUE:archig_resnext101_32x8d:augmentationCutMix  
                                               -9.468e-02  
         pretrainedTRUE:archresnet101d:augmentationCutMix  
                                               -1.122e-01  
          pretrainedTRUE:archresnet18d:augmentationCutMix  
                                               -8.547e-02  
           pretrainedTRUE:archresnet50:augmentationCutMix  
                                               -8.759e-02  
          pretrainedTRUE:archresnet50d:augmentationCutMix  
                                               -6.982e-02  
    pretrainedTRUE:archwide_resnet50_2:augmentationCutMix  
                                               -6.689e-02  
     pretrainedTRUE:augmentationMixUp:label_smoothingTRUE  
                                                5.111e-02  
    pretrainedTRUE:augmentationCutMix:label_smoothingTRUE  
                                                5.191e-02  
      archig_resnext101_32x8d:augmentationMixUp:transTRUE  
                                               -5.656e-03  
               archresnet101d:augmentationMixUp:transTRUE  
                                                2.068e-02  
                archresnet18d:augmentationMixUp:transTRUE  
                                                1.244e-02  
                 archresnet50:augmentationMixUp:transTRUE  
                                                9.801e-03  
                archresnet50d:augmentationMixUp:transTRUE  
                                               -9.574e-04  
          archwide_resnet50_2:augmentationMixUp:transTRUE  
                                                1.794e-02  
     archig_resnext101_32x8d:augmentationCutMix:transTRUE  
                                                8.469e-03  
              archresnet101d:augmentationCutMix:transTRUE  
                                               -8.739e-03  
               archresnet18d:augmentationCutMix:transTRUE  
                                               -2.055e-02  
                archresnet50:augmentationCutMix:transTRUE  
                                                1.405e-02  
               archresnet50d:augmentationCutMix:transTRUE  
                                               -1.698e-04  
         archwide_resnet50_2:augmentationCutMix:transTRUE  
                                                2.349e-03  
              bp_valid.L:pretrainedTRUE:augmentationMixUp  
                                               -5.862e-03  
              bp_valid.Q:pretrainedTRUE:augmentationMixUp  
                                               -9.938e-03  
              bp_valid.C:pretrainedTRUE:augmentationMixUp  
                                                1.798e-02  
              bp_valid^4:pretrainedTRUE:augmentationMixUp  
                                                5.561e-04  
              bp_valid^5:pretrainedTRUE:augmentationMixUp  
                                               -7.104e-03  
              bp_valid^6:pretrainedTRUE:augmentationMixUp  
                                                2.117e-03  
              bp_valid^7:pretrainedTRUE:augmentationMixUp  
                                               -4.217e-03  
              bp_valid^8:pretrainedTRUE:augmentationMixUp  
                                                1.170e-03  
             bp_valid.L:pretrainedTRUE:augmentationCutMix  
                                               -4.333e-02  
             bp_valid.Q:pretrainedTRUE:augmentationCutMix  
                                                1.675e-03  
             bp_valid.C:pretrainedTRUE:augmentationCutMix  
                                                2.397e-02  
             bp_valid^4:pretrainedTRUE:augmentationCutMix  
                                               -7.490e-03  
             bp_valid^5:pretrainedTRUE:augmentationCutMix  
                                               -1.199e-02  
             bp_valid^6:pretrainedTRUE:augmentationCutMix  
                                                1.557e-02  
             bp_valid^7:pretrainedTRUE:augmentationCutMix  
                                               -2.549e-03  
             bp_valid^8:pretrainedTRUE:augmentationCutMix  
                                               -3.646e-03  

Let’s now look at model predictions to get a better sense. We can see a few things:

predictions = select(df,trans,arch,pretrained,label_smoothing,augmentation,bp_valid) %>%
  distinct()

predictions$predicted_acc = sin(predict(reduced_model, predictions))

predictions = predictions %>%
  arrange(-predicted_acc)

predictions %>%
  split(.$bp_valid)
$`0.5`

$`1`

$`2`

$`5`

$`10`

$`20`

$`50`

$`100`

$`200`
NA

Effect of sample quality

Now that we optimized training parameters, let’s evaluate the effect of sample quality. To do that, we did training using only 5 randomly chosen samples as training set, including 0-3 of the four lowest-quality samples per species. Quality was evaluated using two metrics: insert size or increase in T content throughout read length. We then evaluated, for each of the 5 samples per species left out of the training set, whether its prediction was correct.

We did 50 replicates ramdonly choosing the training set for each combination of quality metric and number of low-quality samples in the training set. Let’s now evaluate the results. Let’s start by reading the data.

df = read_csv('sample_quality.csv')[-1]
New names:Rows: 93418 Columns: 13── Column specification ───────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (6): bp_training, samples_training, qual_metric, sample_valid, valid_actual, valid_prediction
dbl (6): ...1, kmer_size, replicate, bp_valid, n_samp_training, n_lowqual_training
lgl (1): valid_lowqual
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df = df %>%
  mutate(correct_pred = valid_actual == valid_prediction)

df

It seems that in general including some low quality samples (by the variation in content metric) may improve high-quality samples a little bit, but only increases variation of low quality samples instead of clearly improving them.

p = df %>%
  filter(qual_metric == 'high_c_sd') %>%
  group_by(replicate, sample_valid, n_lowqual_training) %>%
  filter(bp_valid == min(bp_valid)) %>%
  group_by(replicate, n_lowqual_training, valid_lowqual) %>%
  summarize(mean_acc = mean(correct_pred)) %>%
  mutate(valid_lowqual = c('TRUE' = 'Validation accuracy for low quality samples', 'FALSE' = 'Validation accuracy for high-quality samples')[as.character(valid_lowqual)]) %>%
  ggplot() +
  geom_histogram(aes(x = mean_acc), boundary = 1) +
  scale_y_continuous(sec.axis = sec_axis('identity', name = 'Number of low quality samples in training set',breaks = NULL, labels = NULL, guide = NULL)) + 
  scale_x_continuous(limits = c(0,1)) +
  xlab('Average validation accuracy across all samples') +
  ylab('Frequency across replicates') +
  labs(title = 'Effect of quality determined by variation in GC content on accuracy') + 
  facet_grid(n_lowqual_training~valid_lowqual) +
  theme_few() +
  theme(strip.background = element_rect(fill=gray(0.8)),
        plot.title = element_text(hjust = 0.5)
        )
`summarise()` has grouped output by 'replicate', 'n_lowqual_training'. You can override using the `.groups` argument.
p

ggsave(filename = 'quality_content.pdf',plot =p,device='pdf',path = 'paper_images',width = 7,height = 5,units = 'in')
ggsave(filename = 'quality_content.png',plot =p,device='png',path = 'paper_images',width = 7,height = 5,units = 'in')

NA
NA
NA
NA

The effect is less pronounced for average insert size

p = df %>%
  filter(qual_metric == 'low_size') %>%
  group_by(replicate, sample_valid, n_lowqual_training) %>%
  filter(bp_valid == max(bp_valid)) %>%
  group_by(replicate, n_lowqual_training, valid_lowqual) %>%
  summarize(mean_acc = mean(correct_pred)) %>%
  mutate(valid_lowqual = c('TRUE' = 'Validation accuracy for low quality samples', 'FALSE' = 'Validation accuracy for high-quality samples')[as.character(valid_lowqual)]) %>%
  ggplot() +
  geom_histogram(aes(x = mean_acc), boundary = 1) +
  scale_y_continuous(sec.axis = sec_axis('identity', name = 'Number of low quality samples in training set',breaks = NULL, labels = NULL, guide = NULL)) + 
  scale_x_continuous(limits = c(0,1)) +
  xlab('Average validation accuracy across all samples') +
  ylab('Frequency across replicates') +
  labs(title = 'Effect of sequencing quality determined by insert size on accuracy') + 
  facet_grid(n_lowqual_training~valid_lowqual) +
  theme_few() +
  theme(strip.background = element_rect(fill=gray(0.8)),
        plot.title = element_text(hjust = 0.5)
        )
`summarise()` has grouped output by 'replicate', 'n_lowqual_training'. You can override using the `.groups` argument.
p

ggsave(filename = 'quality_size.pdf',plot =p,device='pdf',path = 'paper_images',width = 7,height = 5,units = 'in')
ggsave(filename = 'quality_size.png',plot =p,device='png',path = 'paper_images',width = 7,height = 5,units = 'in')

What if we order all samples by their validation accuracy and compare to the quality metrics, what do we see?

df_info = read_csv('sample_info_stats.csv')[-1]
New names:Rows: 100 Columns: 11── Column specification ───────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (7): species, collector, collection, country, dna_concentration, library_id, filename_root
dbl (4): ...1, sample_number, insert_size, content_sd
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_info

There seems to be a weak negative correlation between variation in content and accuracy, but many samples that seem to be good with this metric have always low accuracy.

df %>%
  filter(qual_metric == 'high_c_sd') %>%
  group_by(replicate, sample_valid, n_lowqual_training) %>%
  filter(bp_valid == max(bp_valid)) %>%
  group_by(sample_valid, n_lowqual_training) %>%
  summarize(mean_acc = mean(correct_pred)) %>%
  left_join(df_info,by = c('sample_valid' = 'library_id')) %>%
  ggplot() +
  scale_x_sqrt() +
  geom_jitter(aes(x = content_sd, y = mean_acc, color = species),width = 0, height = 0.05) +
  scale_color_viridis_d(option = 'turbo') +
  facet_wrap(~n_lowqual_training)
`summarise()` has grouped output by 'sample_valid'. You can override using the `.groups` argument.

Again, this is less pronounced for insert size

df %>%
  filter(qual_metric == 'low_size') %>%
  group_by(replicate, sample_valid, n_lowqual_training) %>%
  filter(bp_valid == min(bp_valid)) %>%
  group_by(sample_valid, n_lowqual_training) %>%
  summarize(mean_acc = mean(correct_pred)) %>%
  left_join(df_info,by = c('sample_valid' = 'library_id')) %>%
  ggplot() +
  geom_jitter(aes(x = insert_size, y = mean_acc, color = species),width = 0, height = 0.05) +
  scale_color_viridis_d(option = 'turbo') +
  facet_wrap(~n_lowqual_training)
`summarise()` has grouped output by 'sample_valid'. You can override using the `.groups` argument.

What is the relationship between DNA extraction yield and library quality?

First, let’s plot against standard deviation.

p1 = df_info %>%
  mutate(dna_c = ifelse(dna_concentration == 'too high', 200, dna_concentration),
         dna_c = as.numeric(dna_c),
         dna_c = ifelse(dna_c == 0, 0.05, dna_c)) %>%
  ggplot() +
  geom_point(aes(dna_c, content_sd)) +
  scale_y_log10(name = 'Standard deviation in base content') +
  scale_x_log10(name = 'DNA yield (ng/uL)', breaks = c(0.05,0.1,1,10,100,200), labels = c('too\nlow', 0.1, 1, 10, 100, 'too\nhigh')) +
  theme_few()

p1

Now, against insert size

p2 = df_info %>%
  mutate(dna_c = ifelse(dna_concentration == 'too high', 200, dna_concentration),
         dna_c = as.numeric(dna_c),
         dna_c = ifelse(dna_c == 0, 0.05, dna_c)) %>%
  ggplot() +
  geom_point(aes(dna_c, insert_size)) +
  scale_y_continuous(name = 'Insert size (bp)') +
  scale_x_log10(name = 'DNA yield (ng/uL)', breaks = c(0.05,0.1,1,10,100,200), labels = c('too\nlow', 0.1, 1, 10, 100, 'too\nhigh')) +
  theme_few()

p2

title_plot <- ggplot() + 
  labs(title = "Correlation between DNA yield and quality metrics") + 
  theme_void() +  # Remove axes, legend, etc.
  theme(plot.title = element_text(hjust = 0.5, size = 12, face = "bold",vjust=1),
        plot.background = element_rect(fill="white",color="white"))  # Center the title


p = cowplot::plot_grid(
  title_plot,
  cowplot::plot_grid(p1,p2,labels = "AUTO",ncol=1),
  ncol = 1,
  rel_heights = c(0.05,0.95)  # Adjust the relative heights as needed
)
  

p
ggsave(filename = 'yield_vs_quality.pdf',plot =p,device='pdf',path = 'paper_images',width = 5,height = 8.5,units = 'in')
ggsave(filename = 'yield_vs_quality.png',plot =p,device='png',path = 'paper_images',width = 5,height = 8.5,units = 'in')

Bottomline: as long as the majority of the samples for each species are high-quality, having low-quality samples in the training set should not cause much trouble and might even improve inference for some low-quality samples.

Number of samples per species x DNA quality

Now let’s evaluate the effect of number of samples per species.

df = read_csv('n_training.csv')[-1]
New names:Rows: 164229 Columns: 10── Column specification ───────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (5): bp_training, samples_training, sample_valid, valid_actual, valid_prediction
dbl (5): ...1, kmer_size, replicate, bp_valid, n_samp_training
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df = df %>%
  mutate(correct_pred = valid_actual == valid_prediction)

df

Does the number of samples used in training impact the validation accuracy? Let’s plot one panel for each sample. It seems it does.

p = df %>%
  group_by(n_samp_training, bp_valid, sample_valid, valid_actual) %>%
  summarize(mean_acc = mean(correct_pred)) %>%
  ggplot() +
  #geom_jitter(aes(x = n_samp_training/10, y = mean_acc)) +
  geom_boxplot(aes(x = n_samp_training/10, y = mean_acc, group = n_samp_training/10)) +
  facet_wrap(valid_actual~sample_valid) +
  theme_few()
`summarise()` has grouped output by 'n_samp_training', 'bp_valid', 'sample_valid'. You can override using the `.groups` argument.
p

Let’s now plot only the average accuracy for each sample across replicates, with each sample represented by a line.

It seems that more samples in the training set does help, but for most cases about 4 samples is already pretty good. Let’s plot coloring by species

df %>%
  group_by(n_samp_training, sample_valid, valid_actual) %>%
  summarize(mean_acc = mean(correct_pred)) %>%
  mutate(valid_actual = fct_reorder(valid_actual,mean_acc)) %>%
  ggplot() +
  geom_line(aes(x = n_samp_training/10, y = mean_acc, group = sample_valid, color = valid_actual, linetype = valid_actual)) +
  scale_color_manual(values = c(few_pal('Dark')(5),few_pal('Dark')(5))) +
  scale_linetype_manual(values = rep(1:2,each = 5)) +
  theme_few()
`summarise()` has grouped output by 'n_samp_training', 'sample_valid'. You can override using the `.groups` argument.

Now let’s try to use line type by sample quality instead.


df_plot = df %>%
  group_by(n_samp_training, sample_valid, valid_actual) %>%
  summarize(mean_acc = mean(correct_pred)) %>%
  mutate(valid_actual = fct_reorder(valid_actual,mean_acc)) %>%
  left_join(df_info %>% 
              mutate(sample_valid = paste0('S-',sample_number)) %>% 
              mutate(dna_concentration = ifelse(dna_concentration == 'too high',150,dna_concentration)) %>%
              mutate(dna_concentration = as.numeric(dna_concentration)) %>%
              mutate(highqual = dna_concentration >= quantile(dna_concentration,probs=0.5)) %>%
              select(sample_valid, highqual))
`summarise()` has grouped output by 'n_samp_training', 'sample_valid'. You can override using the `.groups` argument.Joining with `by = join_by(sample_valid)`
df_ribbon = df_plot %>%
  group_by(n_samp_training) %>%
  summarise(q1 = quantile(mean_acc,0.25),
            median = median(mean_acc),
            q3 = quantile(mean_acc, 0.75))


p =  ggplot(df_plot) +
  stat_summary(aes(x = n_samp_training/10, y = mean_acc), fill = 'pink', fun.max = function(x){quantile(x,0.75)},fun.min = function(x){quantile(x,0.25)}, geom='ribbon') +
  geom_line(aes(x = n_samp_training/10, y = mean_acc, group = sample_valid, linetype = highqual), alpha = 0.5, size = 0.25) +
  stat_summary(aes(x = n_samp_training/10, y = mean_acc), color = 'red', size = 0.5, fun = 'median', geom='line') +
  scale_linetype_manual(values = c('TRUE' = "solid", 'FALSE' = "51"), name = 'DNA yield', labels = c('TRUE' = 'High', 'FALSE' = 'Low')) +
  scale_x_continuous(breaks=1:7) +
  ylab('Average validation accuracy') +
  xlab('Training samples per species') +
  theme_few(base_size = 6) +
  theme(legend.key.size = unit(0.2, "cm"))

p

The graph is a little cluttered, let’s now do a version for the final figure in the paper:


df_facet_plot = df_plot %>%
  ungroup %>%
  left_join(select(read_csv('sample_info_stats.csv'), sample_valid = library_id, content_sd)) %>%
  mutate(dna_quality = ntile(1-content_sd, 100)) %>%
  mutate(valid_actual = fct_reorder(valid_actual,mean_acc,.fun = mean,.desc = T))
New names:Rows: 100 Columns: 11── Column specification ───────────────────────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (7): species, collector, collection, country, dna_concentration, library_id, filename_root
dbl (4): ...1, sample_number, insert_size, content_sd
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Joining with `by = join_by(sample_valid)`
p =  ggplot(df_facet_plot) +
  #stat_summary(aes(x = n_samp_training/10, y = mean_acc), fill = gray(.8), fun.max = function(x){quantile(x,0.75, type =4)},fun.min = function(x){quantile(x,0.25,type =4)}, geom='ribbon') +
  geom_line(aes(x = n_samp_training/10, y = mean_acc, group = sample_valid, color = dna_quality), alpha = 0.5) +
  stat_summary(aes(x = n_samp_training/10, y = mean_acc), color = 'black', size = 0.5, linetype = 'dashed', fun = 'mean', geom='line') +
  scale_color_viridis_c(name ='DNA quality rank') +
  #scale_linetype_manual(values = c('TRUE' = "solid", 'FALSE' = "51"), name = 'DNA quality', labels = c('TRUE' = 'High', 'FALSE' = 'Low')) +
  scale_x_continuous(breaks=1:7) +
  ylab('Average validation accuracy') +
  xlab('Training samples per species') +
  facet_wrap(~valid_actual,nrow = 1) +
  theme_few(base_size = 6) +
  theme(legend.key.size = unit(0.2, "cm"))

p

ggsave(filename = 'n_samples.png',plot =p,device='png',path = 'paper_images',width = 16,height = 5,units = 'cm',dpi = 2400)

Finally, let’s plot the actual varKodes for Sitgmaphyllon, each species in row, ordered by quality. We start by generating the appropriate dataframe:

find_image = function(sample_id){
  x = list.files(path = 'images_7',pattern = paste0("^.+\\+",sample_id,"_.+"),full.names = T)
  return(x[length(x)])
}


df_varKode_plot = df_facet_plot %>% 
  filter(n_samp_training==70) %>%
  select(sample_valid,valid_actual,content_sd,mean_acc) %>%
  distinct() %>%
  rowwise() %>%
  mutate(image_path=find_image(sample_valid)) %>%
  group_by(valid_actual) %>%
  arrange(-content_sd) %>%
  mutate(quality_rank=1:n()) %>%
  ungroup() %>%
  arrange(valid_actual,quality_rank) %>%
  mutate(valid_actual = str_replace_all(valid_actual,"_",". "))

df_varKode_plot

Now let’s plot


p = ggplot(df_varKode_plot, aes(x=quality_rank, y=valid_actual)) +
  geom_tile(aes(fill=mean_acc), color="white") +
  scale_fill_viridis_c("Average validation accuracy", option = 'magma', limits = c(0,1),labels=scales::percent) +
  ggimage::geom_image(aes(image=image_path), size=0.09) +
  coord_equal() +
  theme_minimal() +
  ggtitle(expression(paste("varKodes for species of ", italic("Stigmaphyllon")))) +
  xlab("DNA quality rank") +
  ylab("Actual species") +
  scale_x_discrete() +
  theme(plot.background = element_rect(fill="white", color = "white"),
        panel.background = element_rect(fill = "white", color = "white"),
        plot.title = element_text(hjust=0.5),
        panel.grid = element_blank(),
        axis.text.y = element_text(face='italic'),
        legend.position = 'bottom')

print(p)

# Save the plot
ggsave(filename = 'varkodes_quality.png', plot = p, device = 'png', path = 'paper_images', width = 7, height = 7, units = 'in', dpi = 1200)
ggsave(filename = 'varkodes_quality.pdf', plot = p, device = 'pdf', path = 'paper_images', width = 7, height = 7, units = 'in')

LS0tCnRpdGxlOiAiQ05OIGV2YWx1YXRpb24iCm91dHB1dDoKICBodG1sX25vdGVib29rOgogICAgZGZfcHJpbnQ6IHBhZ2VkCiAgaHRtbF9kb2N1bWVudDoKICAgIGRmX3ByaW50OiBwYWdlZAogICAgY29kZV9mb2xkaW5nOiBzaG93Ci0tLQpgYGB7cn0KI3JtKGxpc3QgPSBscygpKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShmdXR1cmUpCmxpYnJhcnkoZ2d0aGVtZXMpCnNldC5zZWVkKDEyNDUyNjQpCmBgYAoKSW4gdGhpcyBub3RlYm9vayB3ZSB3aWxsIHRlc3QgdGhlIHBlcmZvcm1hbmNlIG9mIHZhcktvZGUgdG8gZGlzdGluZ3Vpc2ggc3BlY2llcyBvZiAqU3RpZ21hcGh5bGxvbiogYW5kIGZpZ3VyZSBvdXQgdGhlIGJlc3QgcGFyYW1ldGVycyBmb3IgdHJhaW5pbmcgYSBkYXRhc2V0LgoKIyBLbWVyIHNpemUgYW5kIGFtb3VudCBvZiBkYXRhCgpUbyBzdGFydCwgd2UgcHJvZHVjZWQgaW1hZ2VzIGZyb20gZGlmZmVyZW50IG51bWJlcnMgb2Yga21lcnMuIFdlIGNhbiBzdXBwb3NlIHRoYXQgc2hvcnRlciBrbWVycyB3aWxsIG9mZmVyIGxvd2VyIHJlc29sdXRpb24gdG8gcmVzb2x2ZSBzcGVjaWVzLCBidXQgdGhleSB3aWxsIGFsc28gY3JlYXRlIHNtYWxsZXIgZmlsZXMgdGhhdCByZXF1aXJlIGxlc3MgY29tcHV0YXRpb24uIEhlcmUgd2Ugd2lsbCB0ZXN0IHdoZXRoZXIgaW1hZ2VzIGJhc2VkIG9uIGxvbmdlciBrbWVycyByZXN1bHQgaW4gaGlnaGVyIGFjY3VyYWN5LiBBcyBhbiBleGFtcGxlLCBoZXJlIGFyZSBpbWFnZXMgcHJvZHVjZWQgZnJvbSAyMDBNYiBmb3IgdGhlIHNhbWUgc2FtcGxlLCBidXQgZGlmZmVyZW50IGttZXIgc2l6ZXMgKDUtOSk6CgpgYGB7cn0Ka25pdHI6OmluY2x1ZGVfZ3JhcGhpY3MocGFzdGUwKCdpbWFnZXNfJyw1OjksJy9TX2Jhbm5pc3RlcmlvaWRlcytTLTkxXzAwMjAwMDAwSy5wbmcnKSkKYGBgCldlIGFsc28gdXNlZCBkaWZmZXJlbnQgYW1vdW50cyBvZiBkYXRhIHRvIHByb2R1Y2UgaW1hZ2VzLCBzaW5jZSB3ZSB3YW50IHRvIGZpZ3VyZSBvdXQgdGhlIGxvd2VzdCBhbW91bnQgbmVlZGVkIHRvIGRpc3Rpbmd1aXNoIHNwZWNpZXMuIFdpdGggbGVzcyBkYXRhLCBmaWd1cmVzIGdldCBtb3JlIG5vaXN5IHNpbmNlIGNoYW5jZSBwbGF5cyBhIGJpZ2dlciByb2xlIGluIHRoZSBvYnNlcnZlZCBrbWVyIGZyZXF1ZW5jaWVzLiBUaGlzIHNob3VsZCBiZSBtb3JlIHNldmVyZSBmb3IgbGFyZ2VyIGttZXIgc2l6ZXMsIHNpbmNlIGVhY2gga21lciB3aWxsIGJlIG1vcmUgdW5pcXVlIGluIHRoZSBnZW5vbWUuCgpGb3IgZXhhbXBsZSwgaW1hZ2VzIGZvciA1LW1lciBmb3IgdGhlIHNhbWUgc2FtcGxlIGFzIGFib3ZlLCBmb3IgNTAwS2IgYW5kIDIwME1iOgpgYGB7cn0Ka25pdHI6OmluY2x1ZGVfZ3JhcGhpY3MocGFzdGUwKCdpbWFnZXNfNi9TX2Jhbm5pc3RlcmlvaWRlcytTLTkxXzAwJyxjKCcwMDA1MDAnLCcyMDAwMDAnKSwnSy5wbmcnKSkKYGBgClRoZSBzYW1lLCBidXQgZm9yIDgtbWVyczoKYGBge3J9CmtuaXRyOjppbmNsdWRlX2dyYXBoaWNzKHBhc3RlMCgnaW1hZ2VzXzgvU19iYW5uaXN0ZXJpb2lkZXMrUy05MV8wMCcsYygnMDAwNTAwJywnMjAwMDAwJyksJ0sucG5nJykpCmBgYAoKTm93IHRoYXQgd2UgdW5kZXJzdGFuZCB0aGUgZGlmZmVyZW5jZXMgYmV0d2VlbiBpbWFnZXMsIGxldCdzIHVuZGVyc3RhbmQgdGhlIGVmZmVjdCBpbiBhY2N1cmFjeS4gV2UgcHJldmlvdXNseSB0cmFpbmVkIENOTiBtb2RlbHMgdG8gcmVjb2duaXplIGltYWdlcyBmb3IgYSBjb21iaW5hdGlvbiBvZiBrbWVyIHNpemVzIGFuZCBhbW91bnQgb2YgZGF0YSwgd2l0aCAxMCByZXBsaWNhdGVzIGZvciBlYWNoIGNvbWJpbmF0aW9uLiBJbiBlYWNoIHJlcGxpY2F0ZSwgd2Uga2VwdCAzIHJhbmRvbWx5IGNob3NlbiBzYW1wbGVzIHBlciBzcGVjaWVzIGFzIGEgdmFsaWRhdGlvbiBzZXQgYW5kIGNoZWNrZWQgdGhlIGFjY3VyYWN5IG9mIHRoZSB0cmFpbmVkIG1vZGVsIGluIGd1ZXNzaW5nIHRoZSBzcGVjaWVzIG9mIHRoZXNlIHNhbXBsZXMsIGZvciBkaWZmZXJlbnQgYW1vdW50cyBvZiBkYXRhIHVzZWQgZm9yIHRoZSB2YWxpZGF0aW9uIHNhbXBsZS4gV2hhdCB3ZSB3YW50IGlzIHRvIGZpbmQ6CgoxIC0gVGhlIGxvd2VzdCBrbWVyIHNpemUgdG8gcHJvZHVjZSBoaWdoIGFjY3VyYWN5CgoyIC0gVGhlIGxvd2VzdCBhbW91bnQgb2YgZGF0YSBuZWVkZWQKCjMgLSBXaGV0aGVyIHRoZSBhbW91bnQgb2YgZGF0YSB1c2VkIGZvciB0cmFpbmluZyBhbmQgZm9yIHF1ZXJ5aW5nIG11c3QgYmUgc2ltaWxhci4KClRoZSByZXN1bHRzIG9mIHRoZXNlIHNpbXVsYXRpb25zIHdlcmUgc2F2ZWQgYXMgYSBjc3YgdGFibGUsIGxldCdzIGxvYWQgaXQgKGlnbm9yaW5nIHRoZSBmaXJzdCwgaW5kZXggY29sdW1uKToKYGBge3J9CmRmID0gcmVhZF9jc3YoJ2ttZXJTaXplX1ZTX2JwLmNzdicpWy0xXQpkZgpgYGAKCk5vdyBsZXQncyBtYWtlIHN1cmUgYnBfdHJhaW5pbmcgYW5kIGJwX3ZhbGlkIGFyZSB0cmVhdGVkIGFzIG9yZGVyZWQgZmFjdG9ycyBmb3IgbmljZSBwbG90dGluZzoKYGBge3J9Cm5vdF9hbGwgPSBhcy5jaGFyYWN0ZXIoc29ydChhcy5pbnRlZ2VyKHVuaXF1ZShkZiRicF90cmFpbmluZ1shc3RyX2RldGVjdChkZiRicF90cmFpbmluZywnXFx8JywpXSkpLzFlNikpCm9yZGVyZWRfbGV2ZWxzID0gYyhub3RfYWxsLCdhbGwnKQoKZGYgPSBkZiAlPiUKICBtdXRhdGUoYnBfdHJhaW5pbmcgPSBhcy5jaGFyYWN0ZXIoYXMuaW50ZWdlcihicF90cmFpbmluZykvMWU2KSApICU+JQogIG11dGF0ZShicF90cmFpbmluZyA9IHJlcGxhY2VfbmEoYnBfdHJhaW5pbmcsICdhbGwnKSkgJT4lCiAgbXV0YXRlKGJwX3RyYWluaW5nID0gZmFjdG9yKGJwX3RyYWluaW5nLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICBsZXZlbHMgPSBvcmRlcmVkX2xldmVscywgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIG9yZGVyZWQgPSBUUlVFKSwKICAgICAgICAgYnBfdmFsaWQgPSBmYWN0b3IoYXMuY2hhcmFjdGVyKGFzLmludGVnZXIoYnBfdmFsaWQpLzFlNiksIAogICAgICAgICAgICAgICAgICAgICAgICAgICBsZXZlbHM9b3JkZXJlZF9sZXZlbHMsIAogICAgICAgICAgICAgICAgICAgICAgICAgICBvcmRlcmVkID0gVFJVRSksCiAgICAgICAgIGttZXJfc2l6ZSA9IGZhY3Rvcihhcy5jaGFyYWN0ZXIoa21lcl9zaXplKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgIGxldmVscyA9IGFzLmNoYXJhY3Rlcihzb3J0KHVuaXF1ZShrbWVyX3NpemUpKSksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBvcmRlcmVkID0gVFJVRQogICAgICAgICAgICAgICAgICAgICAgICAgICAgKQogICAgICAgICApCmRmCmBgYApMZXQncyBzdW1tYXJpemUgdGhlc2UgcmVzdWx0cyBpbiB0YWJsZSBzbyB3ZSBjYW4gcHV0IHNvbWUgbnVtYmVycyBpbiB0aGUgcGFwZXI6CgpgYGB7cn0KZGYgJT4lCiAgZ3JvdXBfYnkoa21lcl9zaXplLGJwX3RyYWluaW5nKSAlPiUKICBzdW1tYXJpemUobWluX3ZhbGlkID0gbWluKHZhbGlkX2FjYyksCiAgICAgICAgICAgIG1lYW5fdmFsaWQgPSBtZWFuKHZhbGlkX2FjYyksCiAgICAgICAgICAgIG1heF92YWxpZCA9IG1heCh2YWxpZF9hY2MpKQpgYGAKCgpOb3cgd2UgY2FuIHBsb3Q6CmBgYHtyIGZpZy5oZWlnaHQ9MTB9CmttZXJfbGFiZWxsZXIgPSBhc19sYWJlbGxlcihmdW5jdGlvbih2YWx1ZSl7CiAgcmV0dXJuKHBhc3RlMCgna21lciBsZW5ndGg6Jyx2YWx1ZSkpCn0pCgpnZ3Bsb3QoZGYpICsKICBnZW9tX2ppdHRlcihhZXMoeCA9IGJwX3RyYWluaW5nLCB5ID0gYnBfdmFsaWQsIGNvbG9yID0gdmFsaWRfYWNjKSkgKwogIHNjYWxlX2NvbG9yX3ZpcmlkaXNfYygnVmFsaWRhdGlvblxuYWNjdXJhY3knLCBvcHRpb24gPSAnaW5mZXJubycsIGxpbWl0cyA9IGMoMCwxKSkgKwogIGZhY2V0X2dyaWQofmttZXJfc2l6ZSwgbGFiZWxsZXIgPSBrbWVyX2xhYmVsbGVyKSArCiAgY29vcmRfZXF1YWwoKSArCiAgeGxhYignRGF0YSBpbiB0cmFpbmluZyBpbWFnZXMgKE1iKScpICsKICB5bGFiKCdEYXRhIGluIHZhbGlkYXRpb24gaW1hZ2VzIChNYiknKQogIApgYGAKCk5vdyBhIHZlcnNpb24gd2l0aCBhdmVyYWdlZCBhY2N1cmFjeSAgICAKYGBge3IgZmlnLmhlaWdodD0xMH0KcCA9IGRmICU+JQogIGdyb3VwX2J5KGttZXJfc2l6ZSxicF90cmFpbmluZyxicF92YWxpZCkgJT4lCiAgc3VtbWFyaXplKHZhbGlkX2FjYyA9IG1lYW4odmFsaWRfYWNjKSkgJT4lCiAgZ2dwbG90KGFlcyh4ID0gYnBfdHJhaW5pbmcsIHkgPSBicF92YWxpZCwgZmlsbCA9IHZhbGlkX2FjYykpICsKICBnZW9tX3Jhc3RlcigpICsKICAjZ2VvbV90ZXh0KGFlcyhsYWJlbD1zcHJpbnRmKDEwMCp2YWxpZF9hY2MsZm10PSclMi4wZicpKSxzaXplPTQuNSo1LzE0KSArCiAgc2NhbGVfZmlsbF92aXJpZGlzX2MoJ0F2ZXJhZ2VcbnZhbGlkYXRpb25cbmFjY3VyYWN5Jywgb3B0aW9uID0gJ21hZ21hJywgbGltaXRzID0gYygwLDEpLGxhYmVscz1zY2FsZXM6OnBlcmNlbnQpICsKICBmYWNldF9ncmlkKH5rbWVyX3NpemUsIGxhYmVsbGVyID0ga21lcl9sYWJlbGxlcikgKwogIGNvb3JkX2VxdWFsKCkgKwogIHhsYWIoJ0RhdGEgaW4gdHJhaW5pbmcgaW1hZ2VzIChNYiknKSArCiAgeWxhYignRGF0YSBpbiB2YWxpZGF0aW9uIGltYWdlcyAoTWIpJykgKwogIHRoZW1lX2ZldyhiYXNlX3NpemUgPSA2KQoKcAoKZGlyLmNyZWF0ZSgncGFwZXJfaW1hZ2VzJykKZ2dzYXZlKGZpbGVuYW1lID0gJ2ttZXJsZW5fdnNfYWNjdXJhY3kucG5nJyxwbG90ID1wLGRldmljZT0ncG5nJyxwYXRoID0gJ3BhcGVyX2ltYWdlcycsd2lkdGggPSAyMixoZWlnaHQgPSA1LHVuaXRzID0gJ2NtJyxkcGkgPSAyNDAwKQoKYGBgCgpgYGB7cn0KbWVhbnMgPSBkZiAlPiUKICBmaWx0ZXIoYnBfdHJhaW5pbmcgJWluJSBjKCcwLjUnLCcxJywnMjAwJywnYWxsJykpICU+JQogICNmaWx0ZXIoYnBfdmFsaWQgJWluJSBjKCc1MCcsJzEwMCcsJzIwMCcsJ2FsbCcpKSAlPiUKICBmaWx0ZXIoYnBfdmFsaWQgJWluJSBjKCcyJywnNScsJzEwJywnMjAnLCc1MCcsJzEwMCcsJzIwMCcpKSAlPiUKICBncm91cF9ieShicF90cmFpbmluZyxrbWVyX3NpemUpICU+JQogIHN1bW1hcmlzZShJbnQ9bWVkaWFuKHZhbGlkX2FjYykpCgpkZiAlPiUKICBmaWx0ZXIoYnBfdHJhaW5pbmcgJWluJSBjKCcwLjUnLCcxJywnMjAwJywnYWxsJykpICU+JQogIGZpbHRlcihicF92YWxpZCAlaW4lIGMoJzInLCc1JywnMTAnLCcyMCcsJzUwJywnMTAwJywnMjAwJykpICU+JQogICNmaWx0ZXIoYnBfdmFsaWQgJWluJSBjKCc1MCcsJzEwMCcsJzIwMCcsJ2FsbCcpKSAlPiUKICBnZ3Bsb3QoYWVzKHg9dmFsaWRfYWNjKSkgKwogIGdlb21faGlzdG9ncmFtKGFlcyh4PXZhbGlkX2FjYykpICsKICBmYWNldF9ncmlkKGttZXJfc2l6ZX5icF90cmFpbmluZykgKwogIGdlb21fdmxpbmUoZGF0YSA9IG1lYW5zLCBhZXMoeGludGVyY2VwdCA9IEludCkpCmBgYAoKU28gaXQgc2VlbXMgdGhhdCB0aGUgc21hbGxlc3Qga21lciBzaXplcyBuZXZlciByZXN1bHQgaW4gdmVyeSBoaWdoIGFjY3VyYWN5LCBhbmQgdGhlIGxhcmdlc3Qga21lciBzaXplcyByZXN1bHQgaW4gaGlnaCBhY2N1cmFjeSBmb3IgaGlnaGVyIGFtb3VudHMgb2YgZGF0YSwgYnV0IGxvd2VyIGFjY3VyYWN5IGZvciBsb3dlciBhbW91bnRzLiBJdCBzZWVtcyB0aGF0IGEga21lciBzaXplIG9mIDcgaXMgYSBnb29kIGJhbGFuY2UsIGFuZCB0aGF0IHRyYWluaW5nIHVzaW5nIGltYWdlcyBvZiBkaWZmZXJlbnQgc2l6ZXMgaGVscHMgaW4gYmVpbmcgbW9yZSByb2J1c3QgdG8gdGhlIGFtb3VudCBvZiBkYXRhIHVzZWQgdG8gcHJvZHVjZSB2YWxpZGF0aW9uIGltYWdlcy4KCkFzIGxpdHRsZSBhcyAxTWIgcHJvZHVjZXMgbW9kZXJhdGVseSBhY2N1cmF0ZSByZXN1bHRzIGZvciBrbWVyIHNpemUgNyBvciBiZWxvdy4KCgpDYW4gd2UgcXVhbnRpZnkgd2hhdCBpcyBkaWZmZXJlbnQgYWJvdXQgaW1hZ2VzIHByb2R1Y2VkIHdpdGggZGlmZmVyZW50IGRhdGEgYW1vdW50cz8gSXQgc2VlbXMgdGhlcmUgaXMgbGFyZ2VyIHZhcmlhdGlvbiBpbiBwaXhlbCBpbnRlbnNpdGllcywgcHJvYmFibHkgYmVjYXVzZSBvZiByYW5kb20gZmx1Y3R1YXRpb25zOgoKCmBgYHtyfQppbWFnZXMgPSBjKGxpc3QuZmlsZXMocGF0aD0naW1hZ2VzXzUnLHBhdHRlcm49Jy5wbmcnLCByZWN1cnNpdmUgPSBULCBmdWxsLm5hbWVzID0gVCksCiAgICAgICAgICAgbGlzdC5maWxlcyhwYXRoPSdpbWFnZXNfNicscGF0dGVybj0nLnBuZycsIHJlY3Vyc2l2ZSA9IFQsIGZ1bGwubmFtZXMgPSBUKSwKICAgICAgICAgICBsaXN0LmZpbGVzKHBhdGg9J2ltYWdlc183JyxwYXR0ZXJuPScucG5nJywgcmVjdXJzaXZlID0gVCwgZnVsbC5uYW1lcyA9IFQpLAogICAgICAgICAgIGxpc3QuZmlsZXMocGF0aD0naW1hZ2VzXzgnLHBhdHRlcm49Jy5wbmcnLCByZWN1cnNpdmUgPSBULCBmdWxsLm5hbWVzID0gVCksCiAgICAgICAgICAgbGlzdC5maWxlcyhwYXRoPSdpbWFnZXNfOScscGF0dGVybj0nLnBuZycsIHJlY3Vyc2l2ZSA9IFQsIGZ1bGwubmFtZXMgPSBUKSkKCm5rbWVycyA9IGZ1bmN0aW9uKGspeyAjZnJvbSBodHRwczovL2Jpb2luZm9sb2dpY3MuZ2l0aHViLmlvL3Bvc3QvMjAxOC8wOS8xNy9rLW1lci1jb3VudGluZy1wYXJ0LWktaW50cm9kdWN0aW9uLwogICg0XmsgKyAoMSAtIGslJTIpICogNF4oay8yKSkvMgp9CgpnZXRfc2QgPSBmdW5jdGlvbihwYXRoKXsKICBrID0gYXMuaW50ZWdlcihnc3ViKCcuK18oWzAtOV0pLy4rJywnXFwxJywgcGF0aCkpCiAgdGF4b24gPSBnc3ViKCcuKy8oLispXFwrLisnLCdcXDEnLCBwYXRoKQogIHNhbXBsZSA9IGdzdWIoJy4rXFwrKFMtWzAtOV0rKV8uKycsJ1xcMScsIHBhdGgpCiAgTWJwID0gYXMuaW50ZWdlcihnc3ViKCcuK18oWzAtOV17OH0pSy4rJywnXFwxJywgcGF0aCkpIC8gMTAwMAogIAogIHggPSBzb3J0KHBuZzo6cmVhZFBORyhwYXRoKSkKICB4ID0geFsobGVuZ3RoKHgpLW5rbWVycyhrKSsxKTpsZW5ndGgoeCldCiAgc2RfY291bnRzID0gc2QodGFibGUoeCkpCiAgCiAgZGF0YS5mcmFtZShrID0gaywgdGF4b24gPSB0YXhvbiwgc2FtcGxlID0gc2FtcGxlLCBNYnAgPSBNYnAsIHNkX2NvdW50cz1zZF9jb3VudHMpCiAgCn0KCnBsYW4obXVsdGlzZXNzaW9uKHdvcmtlcnMgPSA0KSkKZGYgPSBmdXJycjo6ZnV0dXJlX21hcF9kZnIoaW1hZ2VzLGdldF9zZCkKcGxhbihzZXF1ZW50aWFsKQoKCmRmCmBgYApgYGB7cn0KZ2dwbG90KGRmKSArCiAgZ2VvbV9saW5lKGFlcyh4PU1icCwgeT1zZF9jb3VudHMsY29sb3I9c2FtcGxlKSkgKwogIGZhY2V0X3dyYXAoYXMuZmFjdG9yKGspfi4sc2NhbGVzID0gJ2ZyZWUnKSArCiAgc2NhbGVfY29sb3JfZGlzY3JldGUoZ3VpZGU9J25vbmUnKSArCiAgc2NhbGVfeV9sb2cxMCgpICsKICBzY2FsZV94X2xvZzEwKCkKYGBgCgoKIyBUcmFpbmluZyBwYXJhbWV0ZXJzCgpOb3cgd2Ugd2lsbCBjaGVjayB0aGUgcmVzdWx0cyBvZiB1c2luZyBkaWZmZXJlbnQgdHJhaW5pbmcgcGFyYW1ldGVyczoKLSBtb2RlbCBwcmV0cmFpbmluZwotIGF1Z21lbnRhdGlvbiAoQ3V0TWl4IG9yIE1peFVwKQotIExhYmVsIFNtb290aGluZwotIG1vZGVsIGFyY2hpdGVjdHVyZQotIGxpZ2h0aW5nIHRyYW5zZm9ybWF0aW9ucwoKTGV0J3MgcmVhZCB0aGUgZGF0YSBhbmQgcHJlcGFyZSBmb3IgcGxvdHRpbmc6CgpgYGB7cn0KZGYgPSByZWFkX2NzdigndHJhaW5pbmdfcGFyYW1zLmNzdicpWy0xXSAlPiUKICBtdXRhdGUoYnBfdmFsaWQgPSBmYWN0b3IoYXMuY2hhcmFjdGVyKGFzLmludGVnZXIoYnBfdmFsaWQpLzFlNiksIAogICAgICAgICAgICAgICAgICAgICAgICAgICBsZXZlbHMgPSBzb3J0KHVuaXF1ZShicF92YWxpZC8xZTYpKSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgIG9yZGVyZWQgPSBUUlVFKSwKICAgICAgICAgYXVnbWVudGF0aW9uID0gaWZlbHNlKHN0cl9kZXRlY3QoY2FsbGJhY2ssJ0N1dE1peCcpLCdDdXRNaXgnLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgaWZlbHNlKHN0cl9kZXRlY3QoY2FsbGJhY2ssJ01peFVwJyksJ01peFVwJywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnTm9uZScpCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICApLAogICAgICAgICBhdWdtZW50YXRpb24gPSBmYWN0b3IoYXVnbWVudGF0aW9uLCBsZXZlbHMgPSBjKCdOb25lJywnTWl4VXAnLCdDdXRNaXgnKSxvcmRlcmVkID0gRiksCiAgICAgICAgIGF1ZyA9IHN0cl9yZXBsYWNlKGF1Z21lbnRhdGlvbiwnTm9uZScsJycpLAogICAgICAgICBsYWJsc210aD0gaWZlbHNlKGxhYmVsX3Ntb290aGluZywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICdsYWJlbCBTbW9vdGhpbmcnLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgJycpLAogICAgICAgICBwcmV0ciA9IGlmZWxzZShwcmV0cmFpbmVkLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICdwcmV0cmFpbmVkJywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnJwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICksCiAgICAgICAgIHRyYW5zZm9ybWF0aW9ucyA9IGlmZWxzZSh0cmFucywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnd2l0aF90cmFuc2Zvcm1zJywKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAnJwogICAgICAgICAgICAgICAgICAgICAgICAgICAgICksCiAgICAgICAgIHBhcmFtZXRlcnMgPSBwYXN0ZShhcmNoLHByZXRyLGxhYmxzbXRoLGF1Zyx0cmFuc2Zvcm1hdGlvbnMsc2VwPScsJykgJT4lCiAgICAgICAgICAgc3RyX3JlcGxhY2VfYWxsKCcsezIsfScsJywnKSAlPiUKICAgICAgICAgICBzdHJfcmVtb3ZlX2FsbCgnXix8LCQnKSAlPiUKICAgICAgICAgICBzdHJfcmVwbGFjZV9hbGwoJ14kJywnTm9uZScpICU+JQogICAgICAgICAgIGZjdF9yZW9yZGVyKHZhbGlkX2FjYywgbWVhbikKICApCgpkZiAKCgoKYGBgCk5vdyB3ZSBjYW4gcGxvdCB0aGUgZWZmZWN0IG9mIHBhcmFtZXRlcnMuIFRoZXJlIGFyZSBjbGVhcmx5IHNvbWUgbW9kZWxzIHRoYXQgZG8gbXVjaCBiZXR0ZXIgdGhhbiBvdGhlcnM6CmBgYHtyIGZpZy5oZWlnaHQ9IDV9CmdncGxvdChkZiwgYWVzKHggPSBwYXJhbWV0ZXJzLCB5ID0gdmFsaWRfYWNjKSkgKwogICNnZW9tX2JveHBsb3QoKSArCiAgI2dlb21fdmlvbGluKGFkanVzdD0xLjUpICsKICBnZW9tX2ppdHRlcihhZXMoY29sb3IgPSBicF92YWxpZCksaGVpZ2h0ID0gMC4wMDUpICsKICBzY2FsZV9jb2xvcl92aXJpZGlzX2Qob3B0aW9uPSd0dXJibycsYmVnaW4gPSAwLjEsIGVuZD0wLjkpICsKICAjZmFjZXRfd3JhcCh+YnBfdmFsaWQpICsKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChoanVzdCA9IDEsIGFuZ2xlID0gNDUpKQpgYGAKTGV0J3MgbG9vayBhdCB0aGUgdG9wIDIwIG1vZGVsczoKYGBge3IgZmlnLmhlaWdodCA9IDV9CmdncGxvdChmaWx0ZXIoZGYsIHBhcmFtZXRlcnMgJWluJSB0YWlsKGxldmVscyhkZiRwYXJhbWV0ZXJzKSwyMCkpLCBhZXMoeCA9IHBhcmFtZXRlcnMsIHkgPSB2YWxpZF9hY2MpKSArCiAgI2dlb21fYm94cGxvdCgpICsKICAjZ2VvbV92aW9saW4oYWRqdXN0PTEuNSkgKwogIGdlb21faml0dGVyKGFlcyhjb2xvciA9IGJwX3ZhbGlkKSxoZWlnaHQgPSAwLjAwNSkgKwogIHNjYWxlX2NvbG9yX3ZpcmlkaXNfZChvcHRpb249J3R1cmJvJyxiZWdpbiA9IDAuMSwgZW5kPTAuOSkgKwogICNmYWNldF93cmFwKH5icF92YWxpZCkgKwogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGhqdXN0ID0gMSwgYW5nbGUgPSA0NSkpCmBgYAoKTGV0J3MgcGxvdCBieSBhcmNoaXRlY3R1cmU6CmBgYHtyfQpwID0gZ2dwbG90KG11dGF0ZShkZiwgYXJjaCA9IGZjdF9yZW9yZGVyKGFyY2gsdmFsaWRfYWNjKSksIAogICAgICAgYWVzKHggPSBhcmNoLCB5ID0gdmFsaWRfYWNjLCBjb2xvcj1icF92YWxpZCkpICsKICAjZ2VvbV9ib3hwbG90KCkgKwogICNnZW9tX3Zpb2xpbihhZGp1c3Q9MS41KSArCiAgZ2VvbV9qaXR0ZXIoYWVzKGNvbG9yID0gYnBfdmFsaWQpLGhlaWdodCA9IDAuMDA1LCBzaXplID0gMC4xLCBhbHBoYSA9IDAuMSwgc2hhcGUgPSAxNikgKwogIHN0YXRfc3VtbWFyeShmdW4gPSBtZWFuLCBnZW9tID0gJ2Nyb3NzYmFyJywgc2l6ZSA9IDAuMDUsIHNob3cubGVnZW5kPUZBTFNFKSArCiAgc2NhbGVfY29sb3JfdmlyaWRpc19kKG9wdGlvbj0ndHVyYm8nLGJlZ2luID0gMC4xLCBlbmQ9MC45LCBuYW1lID0gJ01icCBpbiB2YWxpZGF0aW9uXG5pbWFnZXMnLCAKICAgICAgICAgICAgICAgICAgICAgICAgZ3VpZGUgPSBndWlkZV9sZWdlbmQob3ZlcnJpZGUuYWVzID0gbGlzdChhbHBoYSA9IDEpKSkgKwogIHNjYWxlX3lfY29udGludW91cyhsYWJlbHMgPSBzY2FsZXM6OnBlcmNlbnQsIG5hbWUgPSAnVmFsaWRhdGlvbiBBY2N1cmFjeScpICsKICB4bGFiKCdNb2RlbCBhcmNoaXRlY3R1cmUnKSArCiAgI2ZhY2V0X3dyYXAofmJwX3ZhbGlkKSArCiAgdGhlbWVfZmV3KGJhc2Vfc2l6ZSA9IDYpICsKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChoanVzdCA9IDEsIGFuZ2xlID0gNDUpLAogICAgICAgIGxlZ2VuZC5rZXkuc2l6ZSA9IHVuaXQoMC4yLCAiY20iKSkKCnAgCgpnZ3NhdmUoZmlsZW5hbWUgPSAnYXJjaGl0ZWN0dXJlLnBuZycscGxvdCA9cCxkZXZpY2U9J3BuZycscGF0aCA9ICdwYXBlcl9pbWFnZXMnLHdpZHRoID0gNSxoZWlnaHQgPSA1LHVuaXRzID0gJ2NtJyxkcGkgPSAyNDAwKQpgYGAKCk5vdyBieSBwcmV0cmFpbmVkOgpgYGB7cn0KcCA9IGdncGxvdChtdXRhdGUoZGYsIHByZXRyID0gZmN0X3Jlb3JkZXIocHJldHIsdmFsaWRfYWNjKSksIAogICAgICAgYWVzKHggPSBwcmV0ciwgeSA9IHZhbGlkX2FjYywgY29sb3I9YnBfdmFsaWQpKSArCiAgI2dlb21fYm94cGxvdCgpICsKICAjZ2VvbV92aW9saW4oKSArCiAgZ2VvbV9qaXR0ZXIoYWVzKGNvbG9yID0gYnBfdmFsaWQpLGhlaWdodCA9IDAuMDA1LCBzaXplID0gMC4wNSwgYWxwaGEgPSAwLjEsIHNoYXBlID0gMTYpICsKICBzdGF0X3N1bW1hcnkoZnVuID0gbWVhbiwgZ2VvbSA9ICdjcm9zc2JhcicsIHNpemUgPSAwLjA1KSArCiAgc2NhbGVfeF9kaXNjcmV0ZShsYWJlbHMgPSBjKCdwcmUtdHJhaW5lZCcsJ3JhbmRvbScpLCBuYW1lID0gJ01vZGVsIHByZXRyYWluaW5nJykgKwogIHNjYWxlX2NvbG9yX3ZpcmlkaXNfZChvcHRpb249J3R1cmJvJyxiZWdpbiA9IDAuMSwgZW5kPTAuOSwgbmFtZSA9ICdNYnAgaW4gdmFsaWRhdGlvblxuaW1hZ2VzJykgKwogIHNjYWxlX3lfY29udGludW91cyhsYWJlbHMgPSBzY2FsZXM6OnBlcmNlbnQsIG5hbWUgPSAnVmFsaWRhdGlvbiBBY2N1cmFjeScpICsKICAjZmFjZXRfd3JhcCh+YnBfdmFsaWQpICsKICB0aGVtZV9mZXcoYmFzZV9zaXplID0gNikgKwogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGhqdXN0ID0gMSwgYW5nbGUgPSA0NSksCiAgICAgICAgbGVnZW5kLnBvc2l0aW9uID0gJ25vbmUnCiAgICAgICAgKQoKcAoKZ2dzYXZlKGZpbGVuYW1lID0gJ3ByZXRyYWluaW5nLnBuZycscGxvdCA9cCxkZXZpY2U9J3BuZycscGF0aCA9ICdwYXBlcl9pbWFnZXMnLHdpZHRoID0gMyxoZWlnaHQgPSA1LHVuaXRzID0gJ2NtJyxkcGkgPSAyNDAwKQpgYGAKCgpOb3cgYnkgbGFiZWwgc21vb3RoaW5nOgpgYGB7cn0KcCA9IGdncGxvdChtdXRhdGUoZGYsIGxhYmxzbXRoID0gZmN0X3Jlb3JkZXIobGFibHNtdGgsdmFsaWRfYWNjKSksIAogICAgICAgICBhZXMoeCA9IGxhYmxzbXRoLCB5ID0gdmFsaWRfYWNjLCBjb2xvcj1icF92YWxpZCkpICsKICAjZ2VvbV9ib3hwbG90KCkgKwogICNnZW9tX3Zpb2xpbihhZGp1c3Q9MS41KSArCiAgZ2VvbV9qaXR0ZXIoYWVzKGNvbG9yID0gYnBfdmFsaWQpLGhlaWdodCA9IDAuMDA1LCBzaXplID0gMC4wNSwgYWxwaGEgPSAwLjEsIHNoYXBlID0gMTYpICsKICBzdGF0X3N1bW1hcnkoZnVuID0gbWVhbiwgZ2VvbSA9ICdjcm9zc2JhcicsIHNpemUgPSAwLjA1KSArCiAgc2NhbGVfeF9kaXNjcmV0ZShsYWJlbHMgPSBjKCdObycsJ1llcycpLCBuYW1lID0gJ0xhYmVsIHNtb290aGluZycpICsKICBzY2FsZV9jb2xvcl92aXJpZGlzX2Qob3B0aW9uPSd0dXJibycsYmVnaW4gPSAwLjEsIGVuZD0wLjksIG5hbWUgPSAnTWJwIGluIHZhbGlkYXRpb25cbmltYWdlcycpICsKICBzY2FsZV95X2NvbnRpbnVvdXMobGFiZWxzID0gc2NhbGVzOjpwZXJjZW50LCBuYW1lID0gJ1ZhbGlkYXRpb24gQWNjdXJhY3knKSArCiAgI2ZhY2V0X3dyYXAofmJwX3ZhbGlkKSArCiAgdGhlbWVfZmV3KGJhc2Vfc2l6ZSA9IDYpICsKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChoanVzdCA9IDEsIGFuZ2xlID0gNDUpLAogICAgICAgIGxlZ2VuZC5wb3NpdGlvbiA9ICdub25lJwogICAgICAgICkKCnAKZ2dzYXZlKGZpbGVuYW1lID0gJ2xhYmVsc21vb3RoaW5nLnBuZycscGxvdCA9cCxkZXZpY2U9J3BuZycscGF0aCA9ICdwYXBlcl9pbWFnZXMnLHdpZHRoID0gMyxoZWlnaHQgPSA1LHVuaXRzID0gJ2NtJyxkcGkgPSAyNDAwKQpgYGAKTm93IGJ5IEN1dE1peC9NaXhVcCBhdWdtZW50YXRpb25zOgpgYGB7cn0KcCA9IGdncGxvdChtdXRhdGUoZGYsIGF1Z21lbnRhdGlvbiA9IGZjdF9yZW9yZGVyKGF1Z21lbnRhdGlvbix2YWxpZF9hY2MpKSwgCiAgICAgICBhZXMoeCA9IGF1Z21lbnRhdGlvbiwgeSA9IHZhbGlkX2FjYywgY29sb3IgPSBicF92YWxpZCkpICsKICAjZ2VvbV9ib3hwbG90KCkgKwogICNnZW9tX3Zpb2xpbihhZGp1c3Q9MS41KSArCiAgZ2VvbV9qaXR0ZXIoYWVzKGNvbG9yID0gYnBfdmFsaWQpLGhlaWdodCA9IDAuMDA1LCBzaXplID0gMC4wNSwgYWxwaGEgPSAwLjEsIHNoYXBlID0gMTYpICsKICBzdGF0X3N1bW1hcnkoZnVuID0gbWVhbiwgZ2VvbSA9ICdjcm9zc2JhcicsIHNpemUgPSAwLjA1KSArCiAgc2NhbGVfeF9kaXNjcmV0ZShuYW1lID0gJ0F1Z21lbnRhdGlvbicpICsKICBzY2FsZV9jb2xvcl92aXJpZGlzX2Qob3B0aW9uPSd0dXJibycsYmVnaW4gPSAwLjEsIGVuZD0wLjksIG5hbWUgPSAnTWJwIGluIHZhbGlkYXRpb25cbmltYWdlcycpICsKICBzY2FsZV95X2NvbnRpbnVvdXMobGFiZWxzID0gc2NhbGVzOjpwZXJjZW50LCBuYW1lID0gJ1ZhbGlkYXRpb24gQWNjdXJhY3knKSArCiAgdGhlbWVfZmV3KGJhc2Vfc2l6ZSA9IDYpICsKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChoanVzdCA9IDEsIGFuZ2xlID0gNDUpLAogICAgICAgIGxlZ2VuZC5wb3NpdGlvbiA9ICdub25lJwogICAgICAgICkKCnAKZ2dzYXZlKGZpbGVuYW1lID0gJ2F1Z21lbnRhdGlvbnMucG5nJyxwbG90ID1wLGRldmljZT0ncG5nJyxwYXRoID0gJ3BhcGVyX2ltYWdlcycsd2lkdGggPSAzLGhlaWdodCA9IDUsdW5pdHMgPSAnY20nLGRwaSA9IDI0MDApCmBgYAoKRmluYWxseSxieSBsaWdodGluZyB0cmFuc2Zvcm1zOgpgYGB7cn0KcCA9IGdncGxvdChtdXRhdGUoZGYsIHRyYW5zZm9ybWF0aW9ucyA9IGZjdF9yZW9yZGVyKHRyYW5zZm9ybWF0aW9ucyx2YWxpZF9hY2MpKSwgCiAgICAgICBhZXMoeCA9IHRyYW5zZm9ybWF0aW9ucywgeSA9IHZhbGlkX2FjYywgY29sb3I9YnBfdmFsaWQpKSArCiAgI2dlb21fYm94cGxvdCgpICsKICAjZ2VvbV92aW9saW4oYWRqdXN0PTEuNSkgKwogIGdlb21faml0dGVyKGFlcyhjb2xvciA9IGJwX3ZhbGlkKSxoZWlnaHQgPSAwLjAwNSwgc2l6ZSA9IDAuMDUsIGFscGhhID0gMC4xLCBzaGFwZSA9IDE2KSArCiAgc3RhdF9zdW1tYXJ5KGZ1biA9IG1lYW4sIGdlb20gPSAnY3Jvc3NiYXInLCBzaXplID0gMC4wNSkgKwogIHNjYWxlX3hfZGlzY3JldGUobmFtZSA9ICdMaWdodGluZyB0cmFuc2Zvcm1zJywgbGFiZWxzID0gYygnTm8nLCdZZXMnKSkgKwogIHNjYWxlX2NvbG9yX3ZpcmlkaXNfZChvcHRpb249J3R1cmJvJyxiZWdpbiA9IDAuMSwgZW5kPTAuOSwgbmFtZSA9ICdNYnAgaW4gdmFsaWRhdGlvblxuaW1hZ2VzJykgKwogIHNjYWxlX3lfY29udGludW91cyhsYWJlbHMgPSBzY2FsZXM6OnBlcmNlbnQsIG5hbWUgPSAnVmFsaWRhdGlvbiBBY2N1cmFjeScpICsKICB0aGVtZV9mZXcoYmFzZV9zaXplID0gNikgKwogIHRoZW1lKGF4aXMudGV4dC54ID0gZWxlbWVudF90ZXh0KGhqdXN0ID0gMSwgYW5nbGUgPSA0NSksCiAgICAgICAgbGVnZW5kLnBvc2l0aW9uID0gJ25vbmUnCiAgICAgICAgKQoKcApnZ3NhdmUoZmlsZW5hbWUgPSAnbGlnaHRpbmcucG5nJyxwbG90ID1wLGRldmljZT0ncG5nJyxwYXRoID0gJ3BhcGVyX2ltYWdlcycsd2lkdGggPSAzLGhlaWdodCA9IDUsdW5pdHMgPSAnY20nLGRwaSA9IDI0MDApCgpgYGAKCgpMZXQncyB0cnkgYSBsaW5lYXIgbW9kZWwgdG8gY2hlY2sgd2hpY2ggY29tYmluYXRpb24gaXMgYmVzdDoKCmBgYHtyfQpmdWxsX21vZGVsID0gbG0oYXNpbih2YWxpZF9hY2MpfmFyY2gqdHJhbnMqcHJldHJhaW5lZCphdWdtZW50YXRpb24qbGFiZWxfc21vb3RoaW5nKmJwX3ZhbGlkLCBkYXRhID0gZGYpCnBsb3QoZnVsbF9tb2RlbCkKYGBgCmBgYHtyLCByZXN1bHRzPUZBTFNFLCBtZXNzYWdlPUZBTFNFfQpyZWR1Y2VkX21vZGVsID0gc3RlcChsbShhc2luKHZhbGlkX2FjYyl+MSwgZGF0YSA9IGRmKSwgCiAgICAgICAgICAgICAgICAgICAgIHNjb3BlID0gbGlzdChsb3dlciA9IGZvcm11bGEoYXNpbih2YWxpZF9hY2MpfjEpLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHVwcGVyID0gZm9ybXVsYShhc2luKHZhbGlkX2FjYyl+YXJjaCp0cmFucypwcmV0cmFpbmVkKmF1Z21lbnRhdGlvbipsYWJlbF9zbW9vdGhpbmcqYnBfdmFsaWQpCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICApLAogICAgICAgICAgICAgICAgICAgICBkaXJlY3Rpb24gPSAnZm9yd2FyZCcpCmBgYAoKVGhlIGJlc3QgbW9kZWwgaXMgcXVpdGUgY29tcGxleCB3aXRoIHNvbWUgaW50ZXJhY3Rpb25zCgpgYGB7cn0KcmVkdWNlZF9tb2RlbApgYGAKCkxldCdzIG5vdyBsb29rIGF0IG1vZGVsIHByZWRpY3Rpb25zIHRvIGdldCBhIGJldHRlciBzZW5zZS4gV2UgY2FuIHNlZSBhIGZldyB0aGluZ3M6CgoqIFRoZSBiZXN0IG1vZGVsIHZhcmllcyB3aXRoIHRoZSBudW1iZXIgb2YgYnAgdXNlZCB0byBwcm9kdWNlIHZhbGlkYXRpb24gaW1hZ2VzCiogQ3V0TWl4IGFuZCBsaWdodGluZyB0cmFuc2Zvcm1zIGhlbHAKKiBQcmV0cmFpbmluZyBkb2VzIE5PVCBoZWxwCiogaWdfcmVzbmV4dDEwMV8zMng4ZCB3aXRoIEN1dE1peCBhbmQgbGlnaHRpbmcgdHJhbnNmb3JtcyBpcyB0aGUgYmVzdCBmb3IgbGFyZ2VyIGFtb3VudHMgb2YgZGF0YS4gRm9yIDAuNS0xTWJwLCBpdCBpcyBub3QgdGhlIGJlc3QgYnV0IGl0IGlzIGNsb3NlIHRvIHRoZSB0b3AuCiAgKiBUaGlzIGlzIGEgUmVzTmVYVCBhcmNoaXRlY3R1cmUgd2l0aCBkZXB0aCAxMDEsIGNhcmRpbmFsaXR5IDMyIGFuZCBib3R0bGVuZWNrIHNpemUgOC4gYGlnX2Agc3RhbmRzIGZvciBwcmUtdHJhaW5lZCBvbiBpbnN0YWdyYW0sIGJ1dCB3ZSBzZWUgdGhhdCBwcmV0cmFpbmluZyBkaWQgbm90IGhlbHAuIEZvciBtb3JlIGluZm9ybWF0aW9uIGFib3V0IHRoZSBhcmNoaXRlY3R1cmUsIHNlZTogaHR0cHM6Ly9hcnhpdi5vcmcvcGRmLzE2MTEuMDU0MzEucGRmCmBgYHtyfQpwcmVkaWN0aW9ucyA9IHNlbGVjdChkZix0cmFucyxhcmNoLHByZXRyYWluZWQsbGFiZWxfc21vb3RoaW5nLGF1Z21lbnRhdGlvbixicF92YWxpZCkgJT4lCiAgZGlzdGluY3QoKQoKcHJlZGljdGlvbnMkcHJlZGljdGVkX2FjYyA9IHNpbihwcmVkaWN0KHJlZHVjZWRfbW9kZWwsIHByZWRpY3Rpb25zKSkKCnByZWRpY3Rpb25zID0gcHJlZGljdGlvbnMgJT4lCiAgYXJyYW5nZSgtcHJlZGljdGVkX2FjYykKCnByZWRpY3Rpb25zICU+JQogIHNwbGl0KC4kYnBfdmFsaWQpCmBgYAoKIyBFZmZlY3Qgb2Ygc2FtcGxlIHF1YWxpdHkKCk5vdyB0aGF0IHdlIG9wdGltaXplZCB0cmFpbmluZyBwYXJhbWV0ZXJzLCBsZXQncyBldmFsdWF0ZSB0aGUgZWZmZWN0IG9mIHNhbXBsZSBxdWFsaXR5LiBUbyBkbyB0aGF0LCB3ZSBkaWQgdHJhaW5pbmcgdXNpbmcgb25seSA1IHJhbmRvbWx5IGNob3NlbiBzYW1wbGVzIGFzIHRyYWluaW5nIHNldCwgaW5jbHVkaW5nIDAtMyBvZiB0aGUgZm91ciBsb3dlc3QtcXVhbGl0eSBzYW1wbGVzIHBlciBzcGVjaWVzLiBRdWFsaXR5IHdhcyBldmFsdWF0ZWQgdXNpbmcgdHdvIG1ldHJpY3M6IGluc2VydCBzaXplIG9yIGluY3JlYXNlIGluIFQgY29udGVudCB0aHJvdWdob3V0IHJlYWQgbGVuZ3RoLiBXZSB0aGVuIGV2YWx1YXRlZCwgZm9yIGVhY2ggb2YgdGhlIDUgc2FtcGxlcyBwZXIgc3BlY2llcyBsZWZ0IG91dCBvZiB0aGUgdHJhaW5pbmcgc2V0LCB3aGV0aGVyIGl0cyBwcmVkaWN0aW9uIHdhcyBjb3JyZWN0LgoKV2UgZGlkIDUwIHJlcGxpY2F0ZXMgcmFtZG9ubHkgY2hvb3NpbmcgdGhlIHRyYWluaW5nIHNldCBmb3IgZWFjaCBjb21iaW5hdGlvbiBvZiBxdWFsaXR5IG1ldHJpYyBhbmQgbnVtYmVyIG9mIGxvdy1xdWFsaXR5IHNhbXBsZXMgaW4gdGhlIHRyYWluaW5nIHNldC4gTGV0J3Mgbm93IGV2YWx1YXRlIHRoZSByZXN1bHRzLiBMZXQncyBzdGFydCBieSByZWFkaW5nIHRoZSBkYXRhLgoKYGBge3J9CmRmID0gcmVhZF9jc3YoJ3NhbXBsZV9xdWFsaXR5LmNzdicpWy0xXQoKZGYgPSBkZiAlPiUKICBtdXRhdGUoY29ycmVjdF9wcmVkID0gdmFsaWRfYWN0dWFsID09IHZhbGlkX3ByZWRpY3Rpb24pCgpkZgpgYGAKCkl0IHNlZW1zIHRoYXQgaW4gZ2VuZXJhbCBpbmNsdWRpbmcgc29tZSBsb3cgcXVhbGl0eSBzYW1wbGVzIChieSB0aGUgdmFyaWF0aW9uIGluIGNvbnRlbnQgbWV0cmljKSBtYXkgaW1wcm92ZSBoaWdoLXF1YWxpdHkgc2FtcGxlcyBhIGxpdHRsZSBiaXQsIGJ1dCBvbmx5IGluY3JlYXNlcyB2YXJpYXRpb24gb2YgbG93IHF1YWxpdHkgc2FtcGxlcyBpbnN0ZWFkIG9mIGNsZWFybHkgaW1wcm92aW5nIHRoZW0uIAoKYGBge3J9CnAgPSBkZiAlPiUKICBmaWx0ZXIocXVhbF9tZXRyaWMgPT0gJ2hpZ2hfY19zZCcpICU+JQogIGdyb3VwX2J5KHJlcGxpY2F0ZSwgc2FtcGxlX3ZhbGlkLCBuX2xvd3F1YWxfdHJhaW5pbmcpICU+JQogIGZpbHRlcihicF92YWxpZCA9PSBtaW4oYnBfdmFsaWQpKSAlPiUKICBncm91cF9ieShyZXBsaWNhdGUsIG5fbG93cXVhbF90cmFpbmluZywgdmFsaWRfbG93cXVhbCkgJT4lCiAgc3VtbWFyaXplKG1lYW5fYWNjID0gbWVhbihjb3JyZWN0X3ByZWQpKSAlPiUKICBtdXRhdGUodmFsaWRfbG93cXVhbCA9IGMoJ1RSVUUnID0gJ1ZhbGlkYXRpb24gYWNjdXJhY3kgZm9yIGxvdyBxdWFsaXR5IHNhbXBsZXMnLCAnRkFMU0UnID0gJ1ZhbGlkYXRpb24gYWNjdXJhY3kgZm9yIGhpZ2gtcXVhbGl0eSBzYW1wbGVzJylbYXMuY2hhcmFjdGVyKHZhbGlkX2xvd3F1YWwpXSkgJT4lCiAgZ2dwbG90KCkgKwogIGdlb21faGlzdG9ncmFtKGFlcyh4ID0gbWVhbl9hY2MpLCBib3VuZGFyeSA9IDEpICsKICBzY2FsZV95X2NvbnRpbnVvdXMoc2VjLmF4aXMgPSBzZWNfYXhpcygnaWRlbnRpdHknLCBuYW1lID0gJ051bWJlciBvZiBsb3cgcXVhbGl0eSBzYW1wbGVzIGluIHRyYWluaW5nIHNldCcsYnJlYWtzID0gTlVMTCwgbGFiZWxzID0gTlVMTCwgZ3VpZGUgPSBOVUxMKSkgKyAKICBzY2FsZV94X2NvbnRpbnVvdXMobGltaXRzID0gYygwLDEpKSArCiAgeGxhYignQXZlcmFnZSB2YWxpZGF0aW9uIGFjY3VyYWN5IGFjcm9zcyBhbGwgc2FtcGxlcycpICsKICB5bGFiKCdGcmVxdWVuY3kgYWNyb3NzIHJlcGxpY2F0ZXMnKSArCiAgbGFicyh0aXRsZSA9ICdFZmZlY3Qgb2YgcXVhbGl0eSBkZXRlcm1pbmVkIGJ5IHZhcmlhdGlvbiBpbiBHQyBjb250ZW50IG9uIGFjY3VyYWN5JykgKyAKICBmYWNldF9ncmlkKG5fbG93cXVhbF90cmFpbmluZ352YWxpZF9sb3dxdWFsKSArCiAgdGhlbWVfZmV3KCkgKwogIHRoZW1lKHN0cmlwLmJhY2tncm91bmQgPSBlbGVtZW50X3JlY3QoZmlsbD1ncmF5KDAuOCkpLAogICAgICAgIHBsb3QudGl0bGUgPSBlbGVtZW50X3RleHQoaGp1c3QgPSAwLjUpCiAgICAgICAgKQoKcAoKZ2dzYXZlKGZpbGVuYW1lID0gJ3F1YWxpdHlfY29udGVudC5wZGYnLHBsb3QgPXAsZGV2aWNlPSdwZGYnLHBhdGggPSAncGFwZXJfaW1hZ2VzJyx3aWR0aCA9IDcsaGVpZ2h0ID0gNSx1bml0cyA9ICdpbicpCmdnc2F2ZShmaWxlbmFtZSA9ICdxdWFsaXR5X2NvbnRlbnQucG5nJyxwbG90ID1wLGRldmljZT0ncG5nJyxwYXRoID0gJ3BhcGVyX2ltYWdlcycsd2lkdGggPSA3LGhlaWdodCA9IDUsdW5pdHMgPSAnaW4nKQoKCgoKYGBgClRoZSBlZmZlY3QgaXMgbGVzcyBwcm9ub3VuY2VkIGZvciBhdmVyYWdlIGluc2VydCBzaXplCmBgYHtyfQpwID0gZGYgJT4lCiAgZmlsdGVyKHF1YWxfbWV0cmljID09ICdsb3dfc2l6ZScpICU+JQogIGdyb3VwX2J5KHJlcGxpY2F0ZSwgc2FtcGxlX3ZhbGlkLCBuX2xvd3F1YWxfdHJhaW5pbmcpICU+JQogIGZpbHRlcihicF92YWxpZCA9PSBtYXgoYnBfdmFsaWQpKSAlPiUKICBncm91cF9ieShyZXBsaWNhdGUsIG5fbG93cXVhbF90cmFpbmluZywgdmFsaWRfbG93cXVhbCkgJT4lCiAgc3VtbWFyaXplKG1lYW5fYWNjID0gbWVhbihjb3JyZWN0X3ByZWQpKSAlPiUKICBtdXRhdGUodmFsaWRfbG93cXVhbCA9IGMoJ1RSVUUnID0gJ1ZhbGlkYXRpb24gYWNjdXJhY3kgZm9yIGxvdyBxdWFsaXR5IHNhbXBsZXMnLCAnRkFMU0UnID0gJ1ZhbGlkYXRpb24gYWNjdXJhY3kgZm9yIGhpZ2gtcXVhbGl0eSBzYW1wbGVzJylbYXMuY2hhcmFjdGVyKHZhbGlkX2xvd3F1YWwpXSkgJT4lCiAgZ2dwbG90KCkgKwogIGdlb21faGlzdG9ncmFtKGFlcyh4ID0gbWVhbl9hY2MpLCBib3VuZGFyeSA9IDEpICsKICBzY2FsZV95X2NvbnRpbnVvdXMoc2VjLmF4aXMgPSBzZWNfYXhpcygnaWRlbnRpdHknLCBuYW1lID0gJ051bWJlciBvZiBsb3cgcXVhbGl0eSBzYW1wbGVzIGluIHRyYWluaW5nIHNldCcsYnJlYWtzID0gTlVMTCwgbGFiZWxzID0gTlVMTCwgZ3VpZGUgPSBOVUxMKSkgKyAKICBzY2FsZV94X2NvbnRpbnVvdXMobGltaXRzID0gYygwLDEpKSArCiAgeGxhYignQXZlcmFnZSB2YWxpZGF0aW9uIGFjY3VyYWN5IGFjcm9zcyBhbGwgc2FtcGxlcycpICsKICB5bGFiKCdGcmVxdWVuY3kgYWNyb3NzIHJlcGxpY2F0ZXMnKSArCiAgbGFicyh0aXRsZSA9ICdFZmZlY3Qgb2Ygc2VxdWVuY2luZyBxdWFsaXR5IGRldGVybWluZWQgYnkgaW5zZXJ0IHNpemUgb24gYWNjdXJhY3knKSArIAogIGZhY2V0X2dyaWQobl9sb3dxdWFsX3RyYWluaW5nfnZhbGlkX2xvd3F1YWwpICsKICB0aGVtZV9mZXcoKSArCiAgdGhlbWUoc3RyaXAuYmFja2dyb3VuZCA9IGVsZW1lbnRfcmVjdChmaWxsPWdyYXkoMC44KSksCiAgICAgICAgcGxvdC50aXRsZSA9IGVsZW1lbnRfdGV4dChoanVzdCA9IDAuNSkKICAgICAgICApCgpwCgpnZ3NhdmUoZmlsZW5hbWUgPSAncXVhbGl0eV9zaXplLnBkZicscGxvdCA9cCxkZXZpY2U9J3BkZicscGF0aCA9ICdwYXBlcl9pbWFnZXMnLHdpZHRoID0gNyxoZWlnaHQgPSA1LHVuaXRzID0gJ2luJykKZ2dzYXZlKGZpbGVuYW1lID0gJ3F1YWxpdHlfc2l6ZS5wbmcnLHBsb3QgPXAsZGV2aWNlPSdwbmcnLHBhdGggPSAncGFwZXJfaW1hZ2VzJyx3aWR0aCA9IDcsaGVpZ2h0ID0gNSx1bml0cyA9ICdpbicpCmBgYAoKV2hhdCBpZiB3ZSBvcmRlciBhbGwgc2FtcGxlcyBieSB0aGVpciB2YWxpZGF0aW9uIGFjY3VyYWN5IGFuZCBjb21wYXJlIHRvIHRoZSBxdWFsaXR5IG1ldHJpY3MsIHdoYXQgZG8gd2Ugc2VlPwoKYGBge3J9CmRmX2luZm8gPSByZWFkX2Nzdignc2FtcGxlX2luZm9fc3RhdHMuY3N2JylbLTFdCmRmX2luZm8KYGBgClRoZXJlIHNlZW1zIHRvIGJlIGEgd2VhayBuZWdhdGl2ZSBjb3JyZWxhdGlvbiBiZXR3ZWVuIHZhcmlhdGlvbiBpbiBjb250ZW50IGFuZCBhY2N1cmFjeSwgYnV0IG1hbnkgc2FtcGxlcyB0aGF0IHNlZW0gdG8gYmUgZ29vZCB3aXRoIHRoaXMgbWV0cmljIGhhdmUgYWx3YXlzIGxvdyBhY2N1cmFjeS4KCmBgYHtyfQpkZiAlPiUKICBmaWx0ZXIocXVhbF9tZXRyaWMgPT0gJ2hpZ2hfY19zZCcpICU+JQogIGdyb3VwX2J5KHJlcGxpY2F0ZSwgc2FtcGxlX3ZhbGlkLCBuX2xvd3F1YWxfdHJhaW5pbmcpICU+JQogIGZpbHRlcihicF92YWxpZCA9PSBtYXgoYnBfdmFsaWQpKSAlPiUKICBncm91cF9ieShzYW1wbGVfdmFsaWQsIG5fbG93cXVhbF90cmFpbmluZykgJT4lCiAgc3VtbWFyaXplKG1lYW5fYWNjID0gbWVhbihjb3JyZWN0X3ByZWQpKSAlPiUKICBsZWZ0X2pvaW4oZGZfaW5mbyxieSA9IGMoJ3NhbXBsZV92YWxpZCcgPSAnbGlicmFyeV9pZCcpKSAlPiUKICBnZ3Bsb3QoKSArCiAgc2NhbGVfeF9zcXJ0KCkgKwogIGdlb21faml0dGVyKGFlcyh4ID0gY29udGVudF9zZCwgeSA9IG1lYW5fYWNjLCBjb2xvciA9IHNwZWNpZXMpLHdpZHRoID0gMCwgaGVpZ2h0ID0gMC4wNSkgKwogIHNjYWxlX2NvbG9yX3ZpcmlkaXNfZChvcHRpb24gPSAndHVyYm8nKSArCiAgZmFjZXRfd3JhcCh+bl9sb3dxdWFsX3RyYWluaW5nKQpgYGAKQWdhaW4sIHRoaXMgaXMgbGVzcyBwcm9ub3VuY2VkIGZvciBpbnNlcnQgc2l6ZQpgYGB7cn0KZGYgJT4lCiAgZmlsdGVyKHF1YWxfbWV0cmljID09ICdsb3dfc2l6ZScpICU+JQogIGdyb3VwX2J5KHJlcGxpY2F0ZSwgc2FtcGxlX3ZhbGlkLCBuX2xvd3F1YWxfdHJhaW5pbmcpICU+JQogIGZpbHRlcihicF92YWxpZCA9PSBtaW4oYnBfdmFsaWQpKSAlPiUKICBncm91cF9ieShzYW1wbGVfdmFsaWQsIG5fbG93cXVhbF90cmFpbmluZykgJT4lCiAgc3VtbWFyaXplKG1lYW5fYWNjID0gbWVhbihjb3JyZWN0X3ByZWQpKSAlPiUKICBsZWZ0X2pvaW4oZGZfaW5mbyxieSA9IGMoJ3NhbXBsZV92YWxpZCcgPSAnbGlicmFyeV9pZCcpKSAlPiUKICBnZ3Bsb3QoKSArCiAgZ2VvbV9qaXR0ZXIoYWVzKHggPSBpbnNlcnRfc2l6ZSwgeSA9IG1lYW5fYWNjLCBjb2xvciA9IHNwZWNpZXMpLHdpZHRoID0gMCwgaGVpZ2h0ID0gMC4wNSkgKwogIHNjYWxlX2NvbG9yX3ZpcmlkaXNfZChvcHRpb24gPSAndHVyYm8nKSArCiAgZmFjZXRfd3JhcCh+bl9sb3dxdWFsX3RyYWluaW5nKQpgYGAKV2hhdCBpcyB0aGUgcmVsYXRpb25zaGlwIGJldHdlZW4gRE5BIGV4dHJhY3Rpb24geWllbGQgYW5kIGxpYnJhcnkgcXVhbGl0eT8KCkZpcnN0LCBsZXQncyBwbG90IGFnYWluc3Qgc3RhbmRhcmQgZGV2aWF0aW9uLgpgYGB7cn0KcDEgPSBkZl9pbmZvICU+JQogIG11dGF0ZShkbmFfYyA9IGlmZWxzZShkbmFfY29uY2VudHJhdGlvbiA9PSAndG9vIGhpZ2gnLCAyMDAsIGRuYV9jb25jZW50cmF0aW9uKSwKICAgICAgICAgZG5hX2MgPSBhcy5udW1lcmljKGRuYV9jKSwKICAgICAgICAgZG5hX2MgPSBpZmVsc2UoZG5hX2MgPT0gMCwgMC4wNSwgZG5hX2MpKSAlPiUKICBnZ3Bsb3QoKSArCiAgZ2VvbV9wb2ludChhZXMoZG5hX2MsIGNvbnRlbnRfc2QpKSArCiAgc2NhbGVfeV9sb2cxMChuYW1lID0gJ1N0YW5kYXJkIGRldmlhdGlvbiBpbiBiYXNlIGNvbnRlbnQnKSArCiAgc2NhbGVfeF9sb2cxMChuYW1lID0gJ0ROQSB5aWVsZCAobmcvdUwpJywgYnJlYWtzID0gYygwLjA1LDAuMSwxLDEwLDEwMCwyMDApLCBsYWJlbHMgPSBjKCd0b29cbmxvdycsIDAuMSwgMSwgMTAsIDEwMCwgJ3Rvb1xuaGlnaCcpKSArCiAgdGhlbWVfZmV3KCkKCnAxCmBgYApOb3csIGFnYWluc3QgaW5zZXJ0IHNpemUKYGBge3J9CnAyID0gZGZfaW5mbyAlPiUKICBtdXRhdGUoZG5hX2MgPSBpZmVsc2UoZG5hX2NvbmNlbnRyYXRpb24gPT0gJ3RvbyBoaWdoJywgMjAwLCBkbmFfY29uY2VudHJhdGlvbiksCiAgICAgICAgIGRuYV9jID0gYXMubnVtZXJpYyhkbmFfYyksCiAgICAgICAgIGRuYV9jID0gaWZlbHNlKGRuYV9jID09IDAsIDAuMDUsIGRuYV9jKSkgJT4lCiAgZ2dwbG90KCkgKwogIGdlb21fcG9pbnQoYWVzKGRuYV9jLCBpbnNlcnRfc2l6ZSkpICsKICBzY2FsZV95X2NvbnRpbnVvdXMobmFtZSA9ICdJbnNlcnQgc2l6ZSAoYnApJykgKwogIHNjYWxlX3hfbG9nMTAobmFtZSA9ICdETkEgeWllbGQgKG5nL3VMKScsIGJyZWFrcyA9IGMoMC4wNSwwLjEsMSwxMCwxMDAsMjAwKSwgbGFiZWxzID0gYygndG9vXG5sb3cnLCAwLjEsIDEsIDEwLCAxMDAsICd0b29cbmhpZ2gnKSkgKwogIHRoZW1lX2ZldygpCgpwMgpgYGAKCmBgYHtyfQp0aXRsZV9wbG90IDwtIGdncGxvdCgpICsgCiAgbGFicyh0aXRsZSA9ICJDb3JyZWxhdGlvbiBiZXR3ZWVuIEROQSB5aWVsZCBhbmQgcXVhbGl0eSBtZXRyaWNzIikgKyAKICB0aGVtZV92b2lkKCkgKyAgIyBSZW1vdmUgYXhlcywgbGVnZW5kLCBldGMuCiAgdGhlbWUocGxvdC50aXRsZSA9IGVsZW1lbnRfdGV4dChoanVzdCA9IDAuNSwgc2l6ZSA9IDEyLCBmYWNlID0gImJvbGQiLHZqdXN0PTEpLAogICAgICAgIHBsb3QuYmFja2dyb3VuZCA9IGVsZW1lbnRfcmVjdChmaWxsPSJ3aGl0ZSIsY29sb3I9IndoaXRlIikpICAjIENlbnRlciB0aGUgdGl0bGUKCgpwID0gY293cGxvdDo6cGxvdF9ncmlkKAogIHRpdGxlX3Bsb3QsCiAgY293cGxvdDo6cGxvdF9ncmlkKHAxLHAyLGxhYmVscyA9ICJBVVRPIixuY29sPTEpLAogIG5jb2wgPSAxLAogIHJlbF9oZWlnaHRzID0gYygwLjA1LDAuOTUpICAjIEFkanVzdCB0aGUgcmVsYXRpdmUgaGVpZ2h0cyBhcyBuZWVkZWQKKQogIAoKcApnZ3NhdmUoZmlsZW5hbWUgPSAneWllbGRfdnNfcXVhbGl0eS5wZGYnLHBsb3QgPXAsZGV2aWNlPSdwZGYnLHBhdGggPSAncGFwZXJfaW1hZ2VzJyx3aWR0aCA9IDUsaGVpZ2h0ID0gOC41LHVuaXRzID0gJ2luJykKZ2dzYXZlKGZpbGVuYW1lID0gJ3lpZWxkX3ZzX3F1YWxpdHkucG5nJyxwbG90ID1wLGRldmljZT0ncG5nJyxwYXRoID0gJ3BhcGVyX2ltYWdlcycsd2lkdGggPSA1LGhlaWdodCA9IDguNSx1bml0cyA9ICdpbicpCmBgYAoKCgpCb3R0b21saW5lOiBhcyBsb25nIGFzIHRoZSBtYWpvcml0eSBvZiB0aGUgc2FtcGxlcyBmb3IgZWFjaCBzcGVjaWVzIGFyZSBoaWdoLXF1YWxpdHksIGhhdmluZyBsb3ctcXVhbGl0eSBzYW1wbGVzIGluIHRoZSB0cmFpbmluZyBzZXQgc2hvdWxkIG5vdCBjYXVzZSBtdWNoIHRyb3VibGUgYW5kIG1pZ2h0IGV2ZW4gaW1wcm92ZSBpbmZlcmVuY2UgZm9yIHNvbWUgbG93LXF1YWxpdHkgc2FtcGxlcy4KCiMgTnVtYmVyIG9mIHNhbXBsZXMgcGVyIHNwZWNpZXMgeCBETkEgcXVhbGl0eQpOb3cgbGV0J3MgZXZhbHVhdGUgdGhlIGVmZmVjdCBvZiBudW1iZXIgb2Ygc2FtcGxlcyBwZXIgc3BlY2llcy4KCmBgYHtyfQpkZiA9IHJlYWRfY3N2KCduX3RyYWluaW5nLmNzdicpWy0xXQoKZGYgPSBkZiAlPiUKICBtdXRhdGUoY29ycmVjdF9wcmVkID0gdmFsaWRfYWN0dWFsID09IHZhbGlkX3ByZWRpY3Rpb24pCgpkZgpgYGAKCkRvZXMgdGhlIG51bWJlciBvZiBzYW1wbGVzIHVzZWQgaW4gdHJhaW5pbmcgaW1wYWN0IHRoZSB2YWxpZGF0aW9uIGFjY3VyYWN5PwpMZXQncyBwbG90IG9uZSBwYW5lbCBmb3IgZWFjaCBzYW1wbGUuIEl0IHNlZW1zIGl0IGRvZXMuCmBgYHtyIGZpZy5oZWlnaHQ9NTB9CnAgPSBkZiAlPiUKICBncm91cF9ieShuX3NhbXBfdHJhaW5pbmcsIGJwX3ZhbGlkLCBzYW1wbGVfdmFsaWQsIHZhbGlkX2FjdHVhbCkgJT4lCiAgc3VtbWFyaXplKG1lYW5fYWNjID0gbWVhbihjb3JyZWN0X3ByZWQpKSAlPiUKICBnZ3Bsb3QoKSArCiAgI2dlb21faml0dGVyKGFlcyh4ID0gbl9zYW1wX3RyYWluaW5nLzEwLCB5ID0gbWVhbl9hY2MpKSArCiAgZ2VvbV9ib3hwbG90KGFlcyh4ID0gbl9zYW1wX3RyYWluaW5nLzEwLCB5ID0gbWVhbl9hY2MsIGdyb3VwID0gbl9zYW1wX3RyYWluaW5nLzEwKSkgKwogIGZhY2V0X3dyYXAodmFsaWRfYWN0dWFsfnNhbXBsZV92YWxpZCkgKwogIHRoZW1lX2ZldygpCgpwCmBgYAoKTGV0J3Mgbm93IHBsb3Qgb25seSB0aGUgYXZlcmFnZSBhY2N1cmFjeSBmb3IgZWFjaCBzYW1wbGUgYWNyb3NzIHJlcGxpY2F0ZXMsIHdpdGggZWFjaCBzYW1wbGUgcmVwcmVzZW50ZWQgYnkgYSBsaW5lLgoKSXQgc2VlbXMgdGhhdCBtb3JlIHNhbXBsZXMgaW4gdGhlIHRyYWluaW5nIHNldCBkb2VzIGhlbHAsIGJ1dCBmb3IgbW9zdCBjYXNlcyBhYm91dCA0IHNhbXBsZXMgaXMgYWxyZWFkeSBwcmV0dHkgZ29vZC4gTGV0J3MgcGxvdCBjb2xvcmluZyBieSBzcGVjaWVzCmBgYHtyfQpkZiAlPiUKICBncm91cF9ieShuX3NhbXBfdHJhaW5pbmcsIHNhbXBsZV92YWxpZCwgdmFsaWRfYWN0dWFsKSAlPiUKICBzdW1tYXJpemUobWVhbl9hY2MgPSBtZWFuKGNvcnJlY3RfcHJlZCkpICU+JQogIG11dGF0ZSh2YWxpZF9hY3R1YWwgPSBmY3RfcmVvcmRlcih2YWxpZF9hY3R1YWwsbWVhbl9hY2MpKSAlPiUKICBnZ3Bsb3QoKSArCiAgZ2VvbV9saW5lKGFlcyh4ID0gbl9zYW1wX3RyYWluaW5nLzEwLCB5ID0gbWVhbl9hY2MsIGdyb3VwID0gc2FtcGxlX3ZhbGlkLCBjb2xvciA9IHZhbGlkX2FjdHVhbCwgbGluZXR5cGUgPSB2YWxpZF9hY3R1YWwpKSArCiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcyA9IGMoZmV3X3BhbCgnRGFyaycpKDUpLGZld19wYWwoJ0RhcmsnKSg1KSkpICsKICBzY2FsZV9saW5ldHlwZV9tYW51YWwodmFsdWVzID0gcmVwKDE6MixlYWNoID0gNSkpICsKICB0aGVtZV9mZXcoKQpgYGAKTm93IGxldCdzIHRyeSB0byB1c2UgbGluZSB0eXBlIGJ5IHNhbXBsZSBxdWFsaXR5IGluc3RlYWQuCgpgYGB7cn0KCmRmX3Bsb3QgPSBkZiAlPiUKICBncm91cF9ieShuX3NhbXBfdHJhaW5pbmcsIHNhbXBsZV92YWxpZCwgdmFsaWRfYWN0dWFsKSAlPiUKICBzdW1tYXJpemUobWVhbl9hY2MgPSBtZWFuKGNvcnJlY3RfcHJlZCkpICU+JQogIG11dGF0ZSh2YWxpZF9hY3R1YWwgPSBmY3RfcmVvcmRlcih2YWxpZF9hY3R1YWwsbWVhbl9hY2MpKSAlPiUKICBsZWZ0X2pvaW4oZGZfaW5mbyAlPiUgCiAgICAgICAgICAgICAgbXV0YXRlKHNhbXBsZV92YWxpZCA9IHBhc3RlMCgnUy0nLHNhbXBsZV9udW1iZXIpKSAlPiUgCiAgICAgICAgICAgICAgbXV0YXRlKGRuYV9jb25jZW50cmF0aW9uID0gaWZlbHNlKGRuYV9jb25jZW50cmF0aW9uID09ICd0b28gaGlnaCcsMTUwLGRuYV9jb25jZW50cmF0aW9uKSkgJT4lCiAgICAgICAgICAgICAgbXV0YXRlKGRuYV9jb25jZW50cmF0aW9uID0gYXMubnVtZXJpYyhkbmFfY29uY2VudHJhdGlvbikpICU+JQogICAgICAgICAgICAgIG11dGF0ZShoaWdocXVhbCA9IGRuYV9jb25jZW50cmF0aW9uID49IHF1YW50aWxlKGRuYV9jb25jZW50cmF0aW9uLHByb2JzPTAuNSkpICU+JQogICAgICAgICAgICAgIHNlbGVjdChzYW1wbGVfdmFsaWQsIGhpZ2hxdWFsKSkKCmRmX3JpYmJvbiA9IGRmX3Bsb3QgJT4lCiAgZ3JvdXBfYnkobl9zYW1wX3RyYWluaW5nKSAlPiUKICBzdW1tYXJpc2UocTEgPSBxdWFudGlsZShtZWFuX2FjYywwLjI1KSwKICAgICAgICAgICAgbWVkaWFuID0gbWVkaWFuKG1lYW5fYWNjKSwKICAgICAgICAgICAgcTMgPSBxdWFudGlsZShtZWFuX2FjYywgMC43NSkpCgoKcCA9ICBnZ3Bsb3QoZGZfcGxvdCkgKwogIHN0YXRfc3VtbWFyeShhZXMoeCA9IG5fc2FtcF90cmFpbmluZy8xMCwgeSA9IG1lYW5fYWNjKSwgZmlsbCA9ICdwaW5rJywgZnVuLm1heCA9IGZ1bmN0aW9uKHgpe3F1YW50aWxlKHgsMC43NSl9LGZ1bi5taW4gPSBmdW5jdGlvbih4KXtxdWFudGlsZSh4LDAuMjUpfSwgZ2VvbT0ncmliYm9uJykgKwogIGdlb21fbGluZShhZXMoeCA9IG5fc2FtcF90cmFpbmluZy8xMCwgeSA9IG1lYW5fYWNjLCBncm91cCA9IHNhbXBsZV92YWxpZCwgbGluZXR5cGUgPSBoaWdocXVhbCksIGFscGhhID0gMC41LCBzaXplID0gMC4yNSkgKwogIHN0YXRfc3VtbWFyeShhZXMoeCA9IG5fc2FtcF90cmFpbmluZy8xMCwgeSA9IG1lYW5fYWNjKSwgY29sb3IgPSAncmVkJywgc2l6ZSA9IDAuNSwgZnVuID0gJ21lZGlhbicsIGdlb209J2xpbmUnKSArCiAgc2NhbGVfbGluZXR5cGVfbWFudWFsKHZhbHVlcyA9IGMoJ1RSVUUnID0gInNvbGlkIiwgJ0ZBTFNFJyA9ICI1MSIpLCBuYW1lID0gJ0ROQSB5aWVsZCcsIGxhYmVscyA9IGMoJ1RSVUUnID0gJ0hpZ2gnLCAnRkFMU0UnID0gJ0xvdycpKSArCiAgc2NhbGVfeF9jb250aW51b3VzKGJyZWFrcz0xOjcpICsKICB5bGFiKCdBdmVyYWdlIHZhbGlkYXRpb24gYWNjdXJhY3knKSArCiAgeGxhYignVHJhaW5pbmcgc2FtcGxlcyBwZXIgc3BlY2llcycpICsKICB0aGVtZV9mZXcoYmFzZV9zaXplID0gNikgKwogIHRoZW1lKGxlZ2VuZC5rZXkuc2l6ZSA9IHVuaXQoMC4yLCAiY20iKSkKCnAKYGBgCgpUaGUgZ3JhcGggaXMgYSBsaXR0bGUgY2x1dHRlcmVkLCBsZXQncyBub3cgZG8gYSB2ZXJzaW9uIGZvciB0aGUgZmluYWwgZmlndXJlIGluIHRoZSBwYXBlcjoKYGBge3J9CgpkZl9mYWNldF9wbG90ID0gZGZfcGxvdCAlPiUKICB1bmdyb3VwICU+JQogIGxlZnRfam9pbihzZWxlY3QocmVhZF9jc3YoJ3NhbXBsZV9pbmZvX3N0YXRzLmNzdicpLCBzYW1wbGVfdmFsaWQgPSBsaWJyYXJ5X2lkLCBjb250ZW50X3NkKSkgJT4lCiAgbXV0YXRlKGRuYV9xdWFsaXR5ID0gbnRpbGUoMS1jb250ZW50X3NkLCAxMDApKSAlPiUKICBtdXRhdGUodmFsaWRfYWN0dWFsID0gZmN0X3Jlb3JkZXIodmFsaWRfYWN0dWFsLG1lYW5fYWNjLC5mdW4gPSBtZWFuLC5kZXNjID0gVCkpCgpwID0gIGdncGxvdChkZl9mYWNldF9wbG90KSArCiAgI3N0YXRfc3VtbWFyeShhZXMoeCA9IG5fc2FtcF90cmFpbmluZy8xMCwgeSA9IG1lYW5fYWNjKSwgZmlsbCA9IGdyYXkoLjgpLCBmdW4ubWF4ID0gZnVuY3Rpb24oeCl7cXVhbnRpbGUoeCwwLjc1LCB0eXBlID00KX0sZnVuLm1pbiA9IGZ1bmN0aW9uKHgpe3F1YW50aWxlKHgsMC4yNSx0eXBlID00KX0sIGdlb209J3JpYmJvbicpICsKICBnZW9tX2xpbmUoYWVzKHggPSBuX3NhbXBfdHJhaW5pbmcvMTAsIHkgPSBtZWFuX2FjYywgZ3JvdXAgPSBzYW1wbGVfdmFsaWQsIGNvbG9yID0gZG5hX3F1YWxpdHkpLCBhbHBoYSA9IDAuNSkgKwogIHN0YXRfc3VtbWFyeShhZXMoeCA9IG5fc2FtcF90cmFpbmluZy8xMCwgeSA9IG1lYW5fYWNjKSwgY29sb3IgPSAnYmxhY2snLCBzaXplID0gMC41LCBsaW5ldHlwZSA9ICdkYXNoZWQnLCBmdW4gPSAnbWVhbicsIGdlb209J2xpbmUnKSArCiAgc2NhbGVfY29sb3JfdmlyaWRpc19jKG5hbWUgPSdETkEgcXVhbGl0eSByYW5rJykgKwogICNzY2FsZV9saW5ldHlwZV9tYW51YWwodmFsdWVzID0gYygnVFJVRScgPSAic29saWQiLCAnRkFMU0UnID0gIjUxIiksIG5hbWUgPSAnRE5BIHF1YWxpdHknLCBsYWJlbHMgPSBjKCdUUlVFJyA9ICdIaWdoJywgJ0ZBTFNFJyA9ICdMb3cnKSkgKwogIHNjYWxlX3hfY29udGludW91cyhicmVha3M9MTo3KSArCiAgeWxhYignQXZlcmFnZSB2YWxpZGF0aW9uIGFjY3VyYWN5JykgKwogIHhsYWIoJ1RyYWluaW5nIHNhbXBsZXMgcGVyIHNwZWNpZXMnKSArCiAgZmFjZXRfd3JhcCh+dmFsaWRfYWN0dWFsLG5yb3cgPSAxKSArCiAgdGhlbWVfZmV3KGJhc2Vfc2l6ZSA9IDYpICsKICB0aGVtZShsZWdlbmQua2V5LnNpemUgPSB1bml0KDAuMiwgImNtIikpCgpwCgpnZ3NhdmUoZmlsZW5hbWUgPSAnbl9zYW1wbGVzLnBuZycscGxvdCA9cCxkZXZpY2U9J3BuZycscGF0aCA9ICdwYXBlcl9pbWFnZXMnLHdpZHRoID0gMTYsaGVpZ2h0ID0gNSx1bml0cyA9ICdjbScsZHBpID0gMjQwMCkKYGBgCgoKRmluYWxseSwgbGV0J3MgcGxvdCB0aGUgYWN0dWFsIHZhcktvZGVzIGZvciBTaXRnbWFwaHlsbG9uLCBlYWNoIHNwZWNpZXMgaW4gcm93LCBvcmRlcmVkIGJ5IHF1YWxpdHkuIFdlIHN0YXJ0IGJ5IGdlbmVyYXRpbmcgdGhlIGFwcHJvcHJpYXRlIGRhdGFmcmFtZToKCmBgYHtyfQpmaW5kX2ltYWdlID0gZnVuY3Rpb24oc2FtcGxlX2lkKXsKICB4ID0gbGlzdC5maWxlcyhwYXRoID0gJ2ltYWdlc183JyxwYXR0ZXJuID0gcGFzdGUwKCJeLitcXCsiLHNhbXBsZV9pZCwiXy4rIiksZnVsbC5uYW1lcyA9IFQpCiAgcmV0dXJuKHhbbGVuZ3RoKHgpXSkKfQoKCmRmX3ZhcktvZGVfcGxvdCA9IGRmX2ZhY2V0X3Bsb3QgJT4lIAogIGZpbHRlcihuX3NhbXBfdHJhaW5pbmc9PTcwKSAlPiUKICBzZWxlY3Qoc2FtcGxlX3ZhbGlkLHZhbGlkX2FjdHVhbCxjb250ZW50X3NkLG1lYW5fYWNjKSAlPiUKICBkaXN0aW5jdCgpICU+JQogIHJvd3dpc2UoKSAlPiUKICBtdXRhdGUoaW1hZ2VfcGF0aD1maW5kX2ltYWdlKHNhbXBsZV92YWxpZCkpICU+JQogIGdyb3VwX2J5KHZhbGlkX2FjdHVhbCkgJT4lCiAgYXJyYW5nZSgtY29udGVudF9zZCkgJT4lCiAgbXV0YXRlKHF1YWxpdHlfcmFuaz0xOm4oKSkgJT4lCiAgdW5ncm91cCgpICU+JQogIGFycmFuZ2UodmFsaWRfYWN0dWFsLHF1YWxpdHlfcmFuaykgJT4lCiAgbXV0YXRlKHZhbGlkX2FjdHVhbCA9IHN0cl9yZXBsYWNlX2FsbCh2YWxpZF9hY3R1YWwsIl8iLCIuICIpKQoKZGZfdmFyS29kZV9wbG90CmBgYAoKTm93IGxldCdzIHBsb3QKYGBge3J9CgpwID0gZ2dwbG90KGRmX3ZhcktvZGVfcGxvdCwgYWVzKHg9cXVhbGl0eV9yYW5rLCB5PXZhbGlkX2FjdHVhbCkpICsKICBnZW9tX3RpbGUoYWVzKGZpbGw9bWVhbl9hY2MpLCBjb2xvcj0id2hpdGUiKSArCiAgc2NhbGVfZmlsbF92aXJpZGlzX2MoIkF2ZXJhZ2UgdmFsaWRhdGlvbiBhY2N1cmFjeSIsIG9wdGlvbiA9ICdtYWdtYScsIGxpbWl0cyA9IGMoMCwxKSxsYWJlbHM9c2NhbGVzOjpwZXJjZW50KSArCiAgZ2dpbWFnZTo6Z2VvbV9pbWFnZShhZXMoaW1hZ2U9aW1hZ2VfcGF0aCksIHNpemU9MC4wOSkgKwogIGNvb3JkX2VxdWFsKCkgKwogIHRoZW1lX21pbmltYWwoKSArCiAgZ2d0aXRsZShleHByZXNzaW9uKHBhc3RlKCJ2YXJLb2RlcyBmb3Igc3BlY2llcyBvZiAiLCBpdGFsaWMoIlN0aWdtYXBoeWxsb24iKSkpKSArCiAgeGxhYigiRE5BIHF1YWxpdHkgcmFuayIpICsKICB5bGFiKCJBY3R1YWwgc3BlY2llcyIpICsKICBzY2FsZV94X2Rpc2NyZXRlKCkgKwogIHRoZW1lKHBsb3QuYmFja2dyb3VuZCA9IGVsZW1lbnRfcmVjdChmaWxsPSJ3aGl0ZSIsIGNvbG9yID0gIndoaXRlIiksCiAgICAgICAgcGFuZWwuYmFja2dyb3VuZCA9IGVsZW1lbnRfcmVjdChmaWxsID0gIndoaXRlIiwgY29sb3IgPSAid2hpdGUiKSwKICAgICAgICBwbG90LnRpdGxlID0gZWxlbWVudF90ZXh0KGhqdXN0PTAuNSksCiAgICAgICAgcGFuZWwuZ3JpZCA9IGVsZW1lbnRfYmxhbmsoKSwKICAgICAgICBheGlzLnRleHQueSA9IGVsZW1lbnRfdGV4dChmYWNlPSdpdGFsaWMnKSwKICAgICAgICBsZWdlbmQucG9zaXRpb24gPSAnYm90dG9tJykKCnByaW50KHApCgojIFNhdmUgdGhlIHBsb3QKZ2dzYXZlKGZpbGVuYW1lID0gJ3ZhcmtvZGVzX3F1YWxpdHkucG5nJywgcGxvdCA9IHAsIGRldmljZSA9ICdwbmcnLCBwYXRoID0gJ3BhcGVyX2ltYWdlcycsIHdpZHRoID0gNywgaGVpZ2h0ID0gNywgdW5pdHMgPSAnaW4nLCBkcGkgPSAxMjAwKQpnZ3NhdmUoZmlsZW5hbWUgPSAndmFya29kZXNfcXVhbGl0eS5wZGYnLCBwbG90ID0gcCwgZGV2aWNlID0gJ3BkZicsIHBhdGggPSAncGFwZXJfaW1hZ2VzJywgd2lkdGggPSA3LCBoZWlnaHQgPSA3LCB1bml0cyA9ICdpbicpCmBgYAoKCgoKCgoKCgo=